diff options
Diffstat (limited to 'lib/Transforms')
86 files changed, 8170 insertions, 6252 deletions
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index d07f6135257f..8000d0d2ff4a 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -16,7 +16,7 @@  #include "llvm/Pass.h"  #include "llvm/Function.h"  #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/Statistic.h"  using namespace llvm; @@ -32,7 +32,7 @@ namespace {        HelloCounter++;        std::string fname = F.getName();        EscapeString(fname); -      cerr << "Hello: " << fname << "\n"; +      errs() << "Hello: " << fname << "\n";        return false;      }    }; @@ -51,7 +51,7 @@ namespace {        HelloCounter++;        std::string fname = F.getName();        EscapeString(fname); -      cerr << "Hello: " << fname << "\n"; +      errs() << "Hello: " << fname << "\n";        return false;      } diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index a61263401618..5b91f3d20992 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -36,16 +36,18 @@  #include "llvm/Module.h"  #include "llvm/CallGraphSCCPass.h"  #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/CallGraph.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Support/CallSite.h" +#include "llvm/Support/Compiler.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringExtras.h" -#include "llvm/Support/Compiler.h"  #include <set>  using namespace llvm; @@ -60,11 +62,10 @@ namespace {    struct VISIBILITY_HIDDEN ArgPromotion : public CallGraphSCCPass {      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        AU.addRequired<AliasAnalysis>(); -      AU.addRequired<TargetData>();        CallGraphSCCPass::getAnalysisUsage(AU);      } -    virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC); +    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);      static char ID; // Pass identification, replacement for typeid      explicit ArgPromotion(unsigned maxElements = 3)        : CallGraphSCCPass(&ID), maxElements(maxElements) {} @@ -73,11 +74,11 @@ namespace {      typedef std::vector<uint64_t> IndicesVector;    private: -    bool PromoteArguments(CallGraphNode *CGN); +    CallGraphNode *PromoteArguments(CallGraphNode *CGN);      bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; -    Function *DoPromotion(Function *F, -                          SmallPtrSet<Argument*, 8> &ArgsToPromote, -                          SmallPtrSet<Argument*, 8> &ByValArgsToTransform); +    CallGraphNode *DoPromotion(Function *F, +                               SmallPtrSet<Argument*, 8> &ArgsToPromote, +                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform);      /// The maximum number of elements to expand, or 0 for unlimited.      unsigned maxElements;    }; @@ -91,14 +92,17 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {    return new ArgPromotion(maxElements);  } -bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool ArgPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {    bool Changed = false, LocalChange;    do {  // Iterate until we stop promoting from this SCC.      LocalChange = false;      // Attempt to promote arguments from all functions in this SCC.      for (unsigned i = 0, e = SCC.size(); i != e; ++i) -      LocalChange |= PromoteArguments(SCC[i]); +      if (CallGraphNode *CGN = PromoteArguments(SCC[i])) { +        LocalChange = true; +        SCC[i] = CGN; +      }      Changed |= LocalChange;               // Remember that we changed something.    } while (LocalChange); @@ -110,11 +114,11 @@ bool ArgPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) {  /// example, all callers are direct).  If safe to promote some arguments, it  /// calls the DoPromotion method.  /// -bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { +CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {    Function *F = CGN->getFunction();    // Make sure that it is local to this module. -  if (!F || !F->hasLocalLinkage()) return false; +  if (!F || !F->hasLocalLinkage()) return 0;    // First check: see if there are any pointer arguments!  If not, quick exit.    SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs; @@ -123,12 +127,12 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {         I != E; ++I, ++ArgNo)      if (isa<PointerType>(I->getType()))        PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo)); -  if (PointerArgs.empty()) return false; +  if (PointerArgs.empty()) return 0;    // Second check: make sure that all callers are direct callers.  We can't    // transform functions that have indirect callers.    if (F->hasAddressTaken()) -    return false; +    return 0;    // Check to see which arguments are promotable.  If an argument is promotable,    // add it to ArgsToPromote. @@ -144,9 +148,9 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {        const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();        if (const StructType *STy = dyn_cast<StructType>(AgTy)) {          if (maxElements > 0 && STy->getNumElements() > maxElements) { -          DOUT << "argpromotion disable promoting argument '" -               << PtrArg->getName() << "' because it would require adding more " -               << "than " << maxElements << " arguments to the function.\n"; +          DEBUG(errs() << "argpromotion disable promoting argument '" +                << PtrArg->getName() << "' because it would require adding more" +                << " than " << maxElements << " arguments to the function.\n");          } else {            // If all the elements are single-value types, we can promote it.            bool AllSimple = true; @@ -173,13 +177,10 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) {    }    // No promotable pointer arguments. -  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return false; - -  Function *NewF = DoPromotion(F, ArgsToPromote, ByValArgsToTransform); +  if (ArgsToPromote.empty() && ByValArgsToTransform.empty())  +    return 0; -  // Update the call graph to know that the function has been transformed. -  getAnalysis<CallGraph>().changeFunction(F, NewF); -  return true; +  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);  }  /// IsAlwaysValidPointer - Return true if the specified pointer is always legal @@ -409,9 +410,9 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {      // to do.      if (ToPromote.find(Operands) == ToPromote.end()) {        if (maxElements > 0 && ToPromote.size() == maxElements) { -        DOUT << "argpromotion not promoting argument '" -             << Arg->getName() << "' because it would require adding more " -             << "than " << maxElements << " arguments to the function.\n"; +        DEBUG(errs() << "argpromotion not promoting argument '" +              << Arg->getName() << "' because it would require adding more " +              << "than " << maxElements << " arguments to the function.\n");          // We limit aggregate promotion to only promoting up to a fixed number          // of elements of the aggregate.          return false; @@ -432,7 +433,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {    SmallPtrSet<BasicBlock*, 16> TranspBlocks;    AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); -  TargetData &TD = getAnalysis<TargetData>(); +  TargetData *TD = getAnalysisIfAvailable<TargetData>(); +  if (!TD) return false; // Without TargetData, assume the worst.    for (unsigned i = 0, e = Loads.size(); i != e; ++i) {      // Check to see if the load is invalidated from the start of the block to @@ -442,7 +444,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {      const PointerType *LoadTy =        cast<PointerType>(Load->getPointerOperand()->getType()); -    unsigned LoadSize = (unsigned)TD.getTypeStoreSize(LoadTy->getElementType()); +    unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());      if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))        return false;  // Pointer is invalidated! @@ -467,8 +469,8 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {  /// DoPromotion - This method actually performs the promotion of the specified  /// arguments, and returns the new function.  At this point, we know that it's  /// safe to do so. -Function *ArgPromotion::DoPromotion(Function *F, -                                    SmallPtrSet<Argument*, 8> &ArgsToPromote, +CallGraphNode *ArgPromotion::DoPromotion(Function *F, +                               SmallPtrSet<Argument*, 8> &ArgsToPromote,                                SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {    // Start by computing a new prototype for the function, which is the same as @@ -581,19 +583,24 @@ Function *ArgPromotion::DoPromotion(Function *F,    bool ExtraArgHack = false;    if (Params.empty() && FTy->isVarArg()) {      ExtraArgHack = true; -    Params.push_back(Type::Int32Ty); +    Params.push_back(Type::getInt32Ty(F->getContext()));    }    // Construct the new function type using the new arguments.    FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); -  // Create the new function body and insert it into the module... +  // Create the new function body and insert it into the module.    Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());    NF->copyAttributesFrom(F); +   +  DEBUG(errs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n" +        << "From: " << *F); +      // Recompute the parameter attributes list based on the new arguments for    // the function. -  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); +  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), +                                     AttributesVec.end()));    AttributesVec.clear();    F->getParent()->getFunctionList().insert(F, NF); @@ -606,6 +613,10 @@ Function *ArgPromotion::DoPromotion(Function *F,    // Get the callgraph information that we need to update to reflect our    // changes.    CallGraph &CG = getAnalysis<CallGraph>(); +   +  // Get a new callgraph node for NF. +  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); +      // Loop over all of the callers of the function, transforming the call sites    // to pass in the loaded pointers. @@ -636,9 +647,10 @@ Function *ArgPromotion::DoPromotion(Function *F,          // Emit a GEP and load for each element of the struct.          const Type *AgTy = cast<PointerType>(I->getType())->getElementType();          const StructType *STy = cast<StructType>(AgTy); -        Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; +        Value *Idxs[2] = { +              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { -          Idxs[1] = ConstantInt::get(Type::Int32Ty, i); +          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);            Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,                                                   (*AI)->getName()+"."+utostr(i),                                                   Call); @@ -662,7 +674,9 @@ Function *ArgPromotion::DoPromotion(Function *F,                   IE = SI->end(); II != IE; ++II) {                // Use i32 to index structs, and i64 for others (pointers/arrays).                // This satisfies GEP constraints. -              const Type *IdxTy = (isa<StructType>(ElTy) ? Type::Int32Ty : Type::Int64Ty); +              const Type *IdxTy = (isa<StructType>(ElTy) ? +                    Type::getInt32Ty(F->getContext()) :  +                    Type::getInt64Ty(F->getContext()));                Ops.push_back(ConstantInt::get(IdxTy, *II));                // Keep track of the type we're currently indexing                ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); @@ -679,7 +693,7 @@ Function *ArgPromotion::DoPromotion(Function *F,        }      if (ExtraArgHack) -      Args.push_back(Constant::getNullValue(Type::Int32Ty)); +      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));      // Push any varargs arguments on the list      for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { @@ -715,7 +729,8 @@ Function *ArgPromotion::DoPromotion(Function *F,      AA.replaceWithNewValue(Call, New);      // Update the callgraph to know that the callsite has been transformed. -    CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); +    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; +    CalleeNode->replaceCallEdge(Call, New, NF_CGN);      if (!Call->use_empty()) {        Call->replaceAllUsesWith(New); @@ -756,14 +771,16 @@ Function *ArgPromotion::DoPromotion(Function *F,        const Type *AgTy = cast<PointerType>(I->getType())->getElementType();        Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);        const StructType *STy = cast<StructType>(AgTy); -      Value *Idxs[2] = { ConstantInt::get(Type::Int32Ty, 0), 0 }; +      Value *Idxs[2] = { +            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { -        Idxs[1] = ConstantInt::get(Type::Int32Ty, i); -        std::string Name = TheAlloca->getName()+"."+utostr(i); -        Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, -                                               Name, InsertPt); -        I2->setName(I->getName()+"."+utostr(i)); +        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); +        Value *Idx =  +          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, +                                    TheAlloca->getName()+"."+Twine(i),  +                                    InsertPt); +        I2->setName(I->getName()+"."+Twine(i));          new StoreInst(I2++, Idx, InsertPt);        } @@ -792,8 +809,8 @@ Function *ArgPromotion::DoPromotion(Function *F,          LI->replaceAllUsesWith(I2);          AA.replaceWithNewValue(LI, I2);          LI->eraseFromParent(); -        DOUT << "*** Promoted load of argument '" << I->getName() -             << "' in function '" << F->getName() << "'\n"; +        DEBUG(errs() << "*** Promoted load of argument '" << I->getName() +              << "' in function '" << F->getName() << "'\n");        } else {          GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());          IndicesVector Operands; @@ -819,8 +836,8 @@ Function *ArgPromotion::DoPromotion(Function *F,          NewName += ".val";          TheArg->setName(NewName); -        DOUT << "*** Promoted agg argument '" << TheArg->getName() -             << "' of function '" << NF->getName() << "'\n"; +        DEBUG(errs() << "*** Promoted agg argument '" << TheArg->getName() +              << "' of function '" << NF->getName() << "'\n");          // All of the uses must be load instructions.  Replace them all with          // the argument specified by ArgNo. @@ -842,13 +859,18 @@ Function *ArgPromotion::DoPromotion(Function *F,    // Notify the alias analysis implementation that we inserted a new argument.    if (ExtraArgHack) -    AA.copyValue(Constant::getNullValue(Type::Int32Ty), NF->arg_begin()); +    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())),  +                 NF->arg_begin());    // Tell the alias analysis that the old function is about to disappear.    AA.replaceWithNewValue(F, NF); +   +  NF_CGN->stealCalledFunctionsFrom(CG[F]); +      // Now that the old function is dead, delete it. -  F->eraseFromParent(); -  return NF; +  delete CG.removeFunctionFromModule(F); +   +  return NF_CGN;  } diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 1438b4879d2b..ec0f1e193ad6 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -1,18 +1,19 @@  add_llvm_library(LLVMipo -  FunctionAttrs.cpp    ArgumentPromotion.cpp    ConstantMerge.cpp    DeadArgumentElimination.cpp    DeadTypeElimination.cpp    ExtractGV.cpp +  FunctionAttrs.cpp    GlobalDCE.cpp    GlobalOpt.cpp +  IPConstantPropagation.cpp +  IPO.cpp    IndMemRemoval.cpp    InlineAlways.cpp -  Inliner.cpp    InlineSimple.cpp +  Inliner.cpp    Internalize.cpp -  IPConstantPropagation.cpp    LoopExtractor.cpp    LowerSetJmp.cpp    MergeFunctions.cpp diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 237e6db1d335..c1a1045005b7 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -78,7 +78,7 @@ bool ConstantMerge::runOnModule(Module &M) {        }        // Only process constants with initializers. -      if (GV->isConstant() && GV->hasInitializer()) { +      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {          Constant *Init = GV->getInitializer();          // Check to see if the initializer is already known. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index e480dadca891..79a32f02aace 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -24,10 +24,12 @@  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Support/CallSite.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringExtras.h" @@ -72,7 +74,7 @@ namespace {        std::string getDescription() const {          return std::string((IsArg ? "Argument #" : "Return value #"))  -               + utostr(Idx) + " of function " + F->getName(); +               + utostr(Idx) + " of function " + F->getNameStr();        }      }; @@ -195,8 +197,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {    // Start by computing a new prototype for the function, which is the same as    // the old function, but doesn't have isVarArg set.    const FunctionType *FTy = Fn.getFunctionType(); +      std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end()); -  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); +  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), +                                                Params, false);    unsigned NumArgs = Params.size();    // Create the new function body and insert it into the module... @@ -277,7 +281,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {  /// for void functions and 1 for functions not returning a struct. It returns  /// the number of struct elements for functions returning a struct.  static unsigned NumRetVals(const Function *F) { -  if (F->getReturnType() == Type::VoidTy) +  if (F->getReturnType() == Type::getVoidTy(F->getContext()))      return 0;    else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))      return STy->getNumElements(); @@ -422,7 +426,7 @@ void DAE::SurveyFunction(Function &F) {      return;    } -  DOUT << "DAE - Inspecting callers for fn: " << F.getName() << "\n"; +  DEBUG(errs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");    // Keep track of the number of live retvals, so we can skip checks once all    // of them turn out to be live.    unsigned NumLiveRetVals = 0; @@ -485,7 +489,7 @@ void DAE::SurveyFunction(Function &F) {    for (unsigned i = 0; i != RetCount; ++i)      MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]); -  DOUT << "DAE - Inspecting args for fn: " << F.getName() << "\n"; +  DEBUG(errs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");    // Now, check all of our arguments.    unsigned i = 0; @@ -527,7 +531,7 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,  /// mark any values that are used as this function's parameters or by its return  /// values (according to Uses) live as well.  void DAE::MarkLive(const Function &F) { -    DOUT << "DAE - Intrinsically live fn: " << F.getName() << "\n"; +  DEBUG(errs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");      // Mark the function as live.      LiveFunctions.insert(&F);      // Mark all arguments as live. @@ -548,7 +552,7 @@ void DAE::MarkLive(const RetOrArg &RA) {    if (!LiveValues.insert(RA).second)      return; // We were already marked Live. -  DOUT << "DAE - Marking " << RA.getDescription() << " live\n"; +  DEBUG(errs() << "DAE - Marking " << RA.getDescription() << " live\n");    PropagateLiveness(RA);  } @@ -596,11 +600,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {    const Type *RetTy = FTy->getReturnType();    const Type *NRetTy = NULL;    unsigned RetCount = NumRetVals(F); +      // -1 means unused, other numbers are the new index    SmallVector<int, 5> NewRetIdxs(RetCount, -1);    std::vector<const Type*> RetTypes; -  if (RetTy == Type::VoidTy) { -    NRetTy = Type::VoidTy; +  if (RetTy == Type::getVoidTy(F->getContext())) { +    NRetTy = Type::getVoidTy(F->getContext());    } else {      const StructType *STy = dyn_cast<StructType>(RetTy);      if (STy) @@ -612,8 +617,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {            NewRetIdxs[i] = RetTypes.size() - 1;          } else {            ++NumRetValsEliminated; -          DOUT << "DAE - Removing return value " << i << " from " -               << F->getNameStart() << "\n"; +          DEBUG(errs() << "DAE - Removing return value " << i << " from " +                << F->getName() << "\n");          }        }      else @@ -622,8 +627,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {          RetTypes.push_back(RetTy);          NewRetIdxs[0] = 0;        } else { -        DOUT << "DAE - Removing return value from " << F->getNameStart() -             << "\n"; +        DEBUG(errs() << "DAE - Removing return value from " << F->getName() +              << "\n");          ++NumRetValsEliminated;        }      if (RetTypes.size() > 1) @@ -633,14 +638,14 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {        // something and {} into void.        // Make the new struct packed if we used to return a packed struct        // already. -      NRetTy = StructType::get(RetTypes, STy->isPacked()); +      NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());      else if (RetTypes.size() == 1)        // One return type? Just a simple value then, but only if we didn't use to        // return a struct with that simple value before.        NRetTy = RetTypes.front();      else if (RetTypes.size() == 0)        // No return types? Make it void, but only if we didn't use to return {}. -      NRetTy = Type::VoidTy; +      NRetTy = Type::getVoidTy(F->getContext());    }    assert(NRetTy && "No new return type found?"); @@ -649,7 +654,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {    // values. Otherwise, ensure that we don't have any conflicting attributes    // here. Currently, this should not be possible, but special handling might be    // required when new return value attributes are added. -  if (NRetTy == Type::VoidTy) +  if (NRetTy == Type::getVoidTy(F->getContext()))      RAttrs &= ~Attribute::typeIncompatible(NRetTy);    else      assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0  @@ -677,8 +682,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {          AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));      } else {        ++NumArgumentsEliminated; -      DOUT << "DAE - Removing argument " << i << " (" << I->getNameStart() -           << ") from " << F->getNameStart() << "\n"; +      DEBUG(errs() << "DAE - Removing argument " << i << " (" << I->getName() +            << ") from " << F->getName() << "\n");      }    } @@ -697,11 +702,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {    bool ExtraArgHack = false;    if (Params.empty() && FTy->isVarArg() && FTy->getNumParams() != 0) {      ExtraArgHack = true; -    Params.push_back(Type::Int32Ty); +    Params.push_back(Type::getInt32Ty(F->getContext()));    }    // Create the new function type based on the recomputed parameters. -  FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg()); +  FunctionType *NFTy = FunctionType::get(NRetTy, Params, +                                                FTy->isVarArg());    // No change?    if (NFTy == FTy) @@ -750,7 +756,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {        }      if (ExtraArgHack) -      Args.push_back(UndefValue::get(Type::Int32Ty)); +      Args.push_back(UndefValue::get(Type::getInt32Ty(F->getContext())));      // Push any varargs arguments on the list. Don't forget their attributes.      for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) { @@ -786,7 +792,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {          // Return type not changed? Just replace users then.          Call->replaceAllUsesWith(New);          New->takeName(Call); -      } else if (New->getType() == Type::VoidTy) { +      } else if (New->getType() == Type::getVoidTy(F->getContext())) {          // Our return value has uses, but they will get removed later on.          // Replace by null for now.          Call->replaceAllUsesWith(Constant::getNullValue(Call->getType())); @@ -806,7 +812,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {          // extract/insertvalue chaining and let instcombine clean that up.          //          // Start out building up our return value from undef -        Value *RetVal = llvm::UndefValue::get(RetTy); +        Value *RetVal = UndefValue::get(RetTy);          for (unsigned i = 0; i != RetCount; ++i)            if (NewRetIdxs[i] != -1) {              Value *V; @@ -862,7 +868,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {        if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {          Value *RetVal; -        if (NFTy->getReturnType() == Type::VoidTy) { +        if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {            RetVal = 0;          } else {            assert (isa<StructType>(RetTy)); @@ -873,7 +879,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {            // clean that up.            Value *OldRet = RI->getOperand(0);            // Start out building up our return value from undef -          RetVal = llvm::UndefValue::get(NRetTy); +          RetVal = UndefValue::get(NRetTy);            for (unsigned i = 0; i != RetCount; ++i)              if (NewRetIdxs[i] != -1) {                ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i, @@ -893,7 +899,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {          }          // Replace the return instruction with one returning the new return          // value (possibly 0 if we became void). -        ReturnInst::Create(RetVal, RI); +        ReturnInst::Create(F->getContext(), RetVal, RI);          BB->getInstList().erase(RI);        } @@ -910,7 +916,7 @@ bool DAE::runOnModule(Module &M) {    // removed.  We can do this if they never call va_start.  This loop cannot be    // fused with the next loop, because deleting a function invalidates    // information computed while surveying other functions. -  DOUT << "DAE - Deleting dead varargs\n"; +  DEBUG(errs() << "DAE - Deleting dead varargs\n");    for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {      Function &F = *I++;      if (F.getFunctionType()->isVarArg()) @@ -921,7 +927,7 @@ bool DAE::runOnModule(Module &M) {    // We assume all arguments are dead unless proven otherwise (allowing us to    // determine that dead arguments passed into recursive functions are dead).    // -  DOUT << "DAE - Determining liveness\n"; +  DEBUG(errs() << "DAE - Determining liveness\n");    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)      SurveyFunction(*I); diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 0c529d239d98..191100c2e241 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -12,6 +12,7 @@  //===----------------------------------------------------------------------===//  #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Constants.h" @@ -43,6 +44,7 @@ namespace {          return false;  // Nothing to extract        } +              if (deleteStuff)          return deleteGV();        M.setModuleInlineAsm(""); @@ -99,7 +101,8 @@ namespace {        // by putting them in the used array        {          std::vector<Constant *> AUGs; -        const Type *SBP= PointerType::getUnqual(Type::Int8Ty); +        const Type *SBP= +              Type::getInt8PtrTy(M.getContext());          for (std::vector<GlobalValue*>::iterator GI = Named.begin(),                  GE = Named.end(); GI != GE; ++GI) {            (*GI)->setLinkage(GlobalValue::ExternalLinkage); @@ -107,9 +110,9 @@ namespace {          }          ArrayType *AT = ArrayType::get(SBP, AUGs.size());          Constant *Init = ConstantArray::get(AT, AUGs); -        GlobalValue *gv = new GlobalVariable(AT, false,  +        GlobalValue *gv = new GlobalVariable(M, AT, false,                                                GlobalValue::AppendingLinkage,  -                                             Init, "llvm.used", &M); +                                             Init, "llvm.used");          gv->setSection("llvm.metadata");        } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index e8315247b23c..7edaa7fbef5e 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -26,6 +26,7 @@  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/CallGraph.h"  #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MallocHelper.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/UniqueVector.h" @@ -44,7 +45,7 @@ namespace {      FunctionAttrs() : CallGraphSCCPass(&ID) {}      // runOnSCC - Analyze the SCC, performing the transformation if possible. -    bool runOnSCC(const std::vector<CallGraphNode *> &SCC); +    bool runOnSCC(std::vector<CallGraphNode *> &SCC);      // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.      bool AddReadAttrs(const std::vector<CallGraphNode *> &SCC); @@ -54,7 +55,7 @@ namespace {      // IsFunctionMallocLike - Does this function allocate new memory?      bool IsFunctionMallocLike(Function *F, -                              SmallPtrSet<CallGraphNode*, 8> &) const; +                              SmallPtrSet<Function*, 8> &) const;      // AddNoAliasAttrs - Deduce noalias attributes for the SCC.      bool AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC); @@ -93,13 +94,12 @@ bool FunctionAttrs::PointsToLocalMemory(Value *V) {  /// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.  bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) { -  SmallPtrSet<CallGraphNode*, 8> SCCNodes; -  CallGraph &CG = getAnalysis<CallGraph>(); +  SmallPtrSet<Function*, 8> SCCNodes;    // Fill SCCNodes with the elements of the SCC.  Used for quickly    // looking up whether a given CallGraphNode is in this SCC.    for (unsigned i = 0, e = SCC.size(); i != e; ++i) -    SCCNodes.insert(SCC[i]); +    SCCNodes.insert(SCC[i]->getFunction());    // Check if any of the functions in the SCC read or write memory.  If they    // write memory then they can't be marked readnone or readonly. @@ -133,9 +133,9 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {        // Some instructions can be ignored even if they read or write memory.        // Detect these now, skipping to the next instruction if one is found.        CallSite CS = CallSite::get(I); -      if (CS.getInstruction()) { +      if (CS.getInstruction() && CS.getCalledFunction()) {          // Ignore calls to functions in the same SCC. -        if (SCCNodes.count(CG[CS.getCalledFunction()])) +        if (SCCNodes.count(CS.getCalledFunction()))            continue;        } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {          // Ignore loads from local memory. @@ -154,7 +154,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {          return false;        if (isa<MallocInst>(I)) -        // MallocInst claims not to write memory!  PR3754. +        // malloc claims not to write memory!  PR3754.          return false;        // If this instruction may read memory, remember that. @@ -226,9 +226,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {  /// IsFunctionMallocLike - A function is malloc-like if it returns either null  /// or a pointer that doesn't alias any other pointer visible to the caller.  bool FunctionAttrs::IsFunctionMallocLike(Function *F, -                              SmallPtrSet<CallGraphNode*, 8> &SCCNodes) const { -  CallGraph &CG = getAnalysis<CallGraph>(); - +                              SmallPtrSet<Function*, 8> &SCCNodes) const {    UniqueVector<Value *> FlowsToReturn;    for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)      if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator())) @@ -250,32 +248,36 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,      if (Instruction *RVI = dyn_cast<Instruction>(RetVal))        switch (RVI->getOpcode()) {          // Extend the analysis by looking upwards. -        case Instruction::GetElementPtr:          case Instruction::BitCast: +        case Instruction::GetElementPtr:            FlowsToReturn.insert(RVI->getOperand(0));            continue;          case Instruction::Select: {            SelectInst *SI = cast<SelectInst>(RVI);            FlowsToReturn.insert(SI->getTrueValue());            FlowsToReturn.insert(SI->getFalseValue()); -        } continue; +          continue; +        }          case Instruction::PHI: {            PHINode *PN = cast<PHINode>(RVI);            for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)              FlowsToReturn.insert(PN->getIncomingValue(i)); -        } continue; +          continue; +        }          // Check whether the pointer came from an allocation.          case Instruction::Alloca:          case Instruction::Malloc:            break;          case Instruction::Call: +          if (isMalloc(RVI)) +            break;          case Instruction::Invoke: {            CallSite CS(RVI);            if (CS.paramHasAttr(0, Attribute::NoAlias))              break;            if (CS.getCalledFunction() && -              SCCNodes.count(CG[CS.getCalledFunction()])) +              SCCNodes.count(CS.getCalledFunction()))              break;          } // fall-through          default: @@ -291,12 +293,12 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,  /// AddNoAliasAttrs - Deduce noalias attributes for the SCC.  bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) { -  SmallPtrSet<CallGraphNode*, 8> SCCNodes; +  SmallPtrSet<Function*, 8> SCCNodes;    // Fill SCCNodes with the elements of the SCC.  Used for quickly    // looking up whether a given CallGraphNode is in this SCC.    for (unsigned i = 0, e = SCC.size(); i != e; ++i) -    SCCNodes.insert(SCC[i]); +    SCCNodes.insert(SCC[i]->getFunction());    // Check each function in turn, determining which functions return noalias    // pointers. @@ -339,7 +341,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {    return MadeChange;  } -bool FunctionAttrs::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool FunctionAttrs::runOnSCC(std::vector<CallGraphNode *> &SCC) {    bool Changed = AddReadAttrs(SCC);    Changed |= AddNoCaptureAttrs(SCC);    Changed |= AddNoAliasAttrs(SCC); diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 9c652b996aeb..09f9e7c4f68a 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -58,6 +58,7 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }  bool GlobalDCE::runOnModule(Module &M) {    bool Changed = false; +      // Loop over the module, adding globals which are obviously necessary.    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {      Changed |= RemoveUnusedGlobalValue(*I); @@ -147,6 +148,9 @@ bool GlobalDCE::runOnModule(Module &M) {    // Make sure that all memory is released    AliveGlobals.clear(); + +  // Remove dead metadata. +  Changed |= M.getContext().RemoveDeadMetadata();    return Changed;  } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 7fe097c7c576..a44386e6c15f 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -20,20 +20,23 @@  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Support/CallSite.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/STLExtras.h"  #include <algorithm>  using namespace llvm; @@ -56,7 +59,6 @@ STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");  namespace {    struct VISIBILITY_HIDDEN GlobalOpt : public ModulePass {      virtual void getAnalysisUsage(AnalysisUsage &AU) const { -      AU.addRequired<TargetData>();      }      static char ID; // Pass identification, replacement for typeid      GlobalOpt() : ModulePass(&ID) {} @@ -244,7 +246,8 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,    return false;  } -static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) { +static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx, +                                             LLVMContext &Context) {    ConstantInt *CI = dyn_cast<ConstantInt>(Idx);    if (!CI) return 0;    unsigned IdxV = CI->getZExtValue(); @@ -280,7 +283,8 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {  /// users of the global, cleaning up the obvious ones.  This is largely just a  /// quick scan over the use list to clean up the easy and obvious cruft.  This  /// returns true if it made a change. -static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { +static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, +                                       LLVMContext &Context) {    bool Changed = false;    for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {      User *U = *UI++; @@ -301,11 +305,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {          Constant *SubInit = 0;          if (Init)            SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); -        Changed |= CleanupConstantGlobalUsers(CE, SubInit); +        Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context);        } else if (CE->getOpcode() == Instruction::BitCast &&                    isa<PointerType>(CE->getType())) {          // Pointer cast, delete any stores and memsets to the global. -        Changed |= CleanupConstantGlobalUsers(CE, 0); +        Changed |= CleanupConstantGlobalUsers(CE, 0, Context);        }        if (CE->use_empty()) { @@ -319,11 +323,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {        Constant *SubInit = 0;        if (!isa<ConstantExpr>(GEP->getOperand(0))) {          ConstantExpr *CE =  -          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP)); +          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context));          if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)            SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);        } -      Changed |= CleanupConstantGlobalUsers(GEP, SubInit); +      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context);        if (GEP->use_empty()) {          GEP->eraseFromParent(); @@ -341,7 +345,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {        if (SafeToDestroyConstant(C)) {          C->destroyConstant();          // This could have invalidated UI, start over from scratch. -        CleanupConstantGlobalUsers(V, Init); +        CleanupConstantGlobalUsers(V, Init, Context);          return true;        }      } @@ -423,13 +427,18 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {      // Scalar replacing *just* the outer index of the array is probably not      // going to be a win anyway, so just give up.      for (++GEPI; // Skip array index. -         GEPI != E && (isa<ArrayType>(*GEPI) || isa<VectorType>(*GEPI)); +         GEPI != E;           ++GEPI) {        uint64_t NumElements;        if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))          NumElements = SubArrayTy->getNumElements(); -      else -        NumElements = cast<VectorType>(*GEPI)->getNumElements(); +      else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI)) +        NumElements = SubVectorTy->getNumElements(); +      else { +        assert(isa<StructType>(*GEPI) && +               "Indexed GEP type is not array, vector, or struct!"); +        continue; +      }        ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());        if (!IdxVal || IdxVal->getZExtValue() >= NumElements) @@ -461,7 +470,8 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {  /// behavior of the program in a more fine-grained way.  We have determined that  /// this transformation is safe already.  We return the first global variable we  /// insert so that the caller can reprocess it. -static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) { +static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD, +                                 LLVMContext &Context) {    // Make sure this global only has simple uses that we can SRA.    if (!GlobalUsersSafeToSRA(GV))      return 0; @@ -483,14 +493,15 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {      const StructLayout &Layout = *TD.getStructLayout(STy);      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {        Constant *In = getAggregateConstantElement(Init, -                                            ConstantInt::get(Type::Int32Ty, i)); +                                ConstantInt::get(Type::getInt32Ty(Context), i), +                                    Context);        assert(In && "Couldn't get element of initializer?"); -      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false, +      GlobalVariable *NGV = new GlobalVariable(Context, +                                               STy->getElementType(i), false,                                                 GlobalVariable::InternalLinkage, -                                               In, GV->getName()+"."+utostr(i), -                                               (Module *)NULL, +                                               In, GV->getName()+"."+Twine(i),                                                 GV->isThreadLocal(), -                                               GV->getType()->getAddressSpace()); +                                              GV->getType()->getAddressSpace());        Globals.insert(GV, NGV);        NewGlobals.push_back(NGV); @@ -517,15 +528,16 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {      unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());      for (unsigned i = 0, e = NumElements; i != e; ++i) {        Constant *In = getAggregateConstantElement(Init, -                                            ConstantInt::get(Type::Int32Ty, i)); +                                ConstantInt::get(Type::getInt32Ty(Context), i), +                                    Context);        assert(In && "Couldn't get element of initializer?"); -      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false, +      GlobalVariable *NGV = new GlobalVariable(Context, +                                               STy->getElementType(), false,                                                 GlobalVariable::InternalLinkage, -                                               In, GV->getName()+"."+utostr(i), -                                               (Module *)NULL, +                                               In, GV->getName()+"."+Twine(i),                                                 GV->isThreadLocal(), -                                               GV->getType()->getAddressSpace()); +                                              GV->getType()->getAddressSpace());        Globals.insert(GV, NGV);        NewGlobals.push_back(NGV); @@ -541,9 +553,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {    if (NewGlobals.empty())      return 0; -  DOUT << "PERFORMING GLOBAL SRA ON: " << *GV; +  DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV); -  Constant *NullInt = Constant::getNullValue(Type::Int32Ty); +  Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context));    // Loop over all of the uses of the global, replacing the constantexpr geps,    // with smaller constantexpr geps or direct references. @@ -577,7 +589,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {          for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)            Idxs.push_back(GEPI->getOperand(i));          NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(), -                                           GEPI->getName()+"."+utostr(Val), GEPI); +                                           GEPI->getName()+"."+Twine(Val),GEPI);        }      }      GEP->replaceAllUsesWith(NewPtr); @@ -667,7 +679,8 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {    return true;  } -static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { +static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV, +                                           LLVMContext &Context) {    bool Changed = false;    for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {      Instruction *I = cast<Instruction>(*UI++); @@ -700,7 +713,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {      } else if (CastInst *CI = dyn_cast<CastInst>(I)) {        Changed |= OptimizeAwayTrappingUsesOfValue(CI,                                  ConstantExpr::getCast(CI->getOpcode(), -                                                      NewV, CI->getType())); +                                                NewV, CI->getType()), Context);        if (CI->use_empty()) {          Changed = true;          CI->eraseFromParent(); @@ -717,8 +730,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {            break;        if (Idxs.size() == GEPI->getNumOperands()-1)          Changed |= OptimizeAwayTrappingUsesOfValue(GEPI, -                                ConstantExpr::getGetElementPtr(NewV, &Idxs[0], -                                                               Idxs.size())); +                          ConstantExpr::getGetElementPtr(NewV, &Idxs[0], +                                                        Idxs.size()), Context);        if (GEPI->use_empty()) {          Changed = true;          GEPI->eraseFromParent(); @@ -734,7 +747,8 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {  /// value stored into it.  If there are uses of the loaded value that would trap  /// if the loaded value is dynamically null, then we know that they cannot be  /// reachable with a null optimize away the load. -static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) { +static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, +                                            LLVMContext &Context) {    bool Changed = false;    // Keep track of whether we are able to remove all the uses of the global @@ -745,7 +759,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {    for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){      User *GlobalUser = *GUI++;      if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) { -      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); +      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context);        // If we were able to delete all uses of the loads        if (LI->use_empty()) {          LI->eraseFromParent(); @@ -768,15 +782,15 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {    }    if (Changed) { -    DOUT << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV; +    DEBUG(errs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);      ++NumGlobUses;    }    // If we nuked all of the loads, then none of the stores are needed either,    // nor is the global.    if (AllNonStoreUsesGone) { -    DOUT << "  *** GLOBAL NOW DEAD!\n"; -    CleanupConstantGlobalUsers(GV, 0); +    DEBUG(errs() << "  *** GLOBAL NOW DEAD!\n"); +    CleanupConstantGlobalUsers(GV, 0, Context);      if (GV->use_empty()) {        GV->eraseFromParent();        ++NumDeleted; @@ -788,10 +802,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {  /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the  /// instructions that are foldable. -static void ConstantPropUsersOf(Value *V) { +static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {    for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )      if (Instruction *I = dyn_cast<Instruction>(*UI++)) -      if (Constant *NewC = ConstantFoldInstruction(I)) { +      if (Constant *NewC = ConstantFoldInstruction(I, Context)) {          I->replaceAllUsesWith(NewC);          // Advance UI to the next non-I use to avoid invalidating it! @@ -808,8 +822,9 @@ static void ConstantPropUsersOf(Value *V) {  /// malloc, there is no reason to actually DO the malloc.  Instead, turn the  /// malloc into a global, and any loads of GV as uses of the new global.  static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, -                                                     MallocInst *MI) { -  DOUT << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI; +                                                     MallocInst *MI, +                                                     LLVMContext &Context) { +  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *MI);    ConstantInt *NElements = cast<ConstantInt>(MI->getArraySize());    if (NElements->getZExtValue() != 1) { @@ -818,10 +833,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,      Type *NewTy = ArrayType::get(MI->getAllocatedType(),                                   NElements->getZExtValue());      MallocInst *NewMI = -      new MallocInst(NewTy, Constant::getNullValue(Type::Int32Ty), +      new MallocInst(NewTy, Constant::getNullValue(Type::getInt32Ty(Context)),                       MI->getAlignment(), MI->getName(), MI);      Value* Indices[2]; -    Indices[0] = Indices[1] = Constant::getNullValue(Type::Int32Ty); +    Indices[0] = Indices[1] = Constant::getNullValue(Type::getInt32Ty(Context));      Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,                                                NewMI->getName()+".el0", MI);      MI->replaceAllUsesWith(NewGEP); @@ -831,17 +846,17 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,    // Create the new global variable.  The contents of the malloc'd memory is    // undefined, so initialize with an undef value. +  // FIXME: This new global should have the alignment returned by malloc.  Code +  // could depend on malloc returning large alignment (on the mac, 16 bytes) but +  // this would only guarantee some lower alignment.    Constant *Init = UndefValue::get(MI->getAllocatedType()); -  GlobalVariable *NewGV = new GlobalVariable(MI->getAllocatedType(), false, +  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),  +                                             MI->getAllocatedType(), false,                                               GlobalValue::InternalLinkage, Init,                                               GV->getName()+".body", -                                             (Module *)NULL, +                                             GV,                                               GV->isThreadLocal()); -  // FIXME: This new global should have the alignment returned by malloc.  Code -  // could depend on malloc returning large alignment (on the mac, 16 bytes) but -  // this would only guarantee some lower alignment. -  GV->getParent()->getGlobalList().insert(GV, NewGV); - +      // Anything that used the malloc now uses the global directly.    MI->replaceAllUsesWith(NewGV); @@ -853,9 +868,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,    // If there is a comparison against null, we will insert a global bool to    // keep track of whether the global was initialized yet or not.    GlobalVariable *InitBool = -    new GlobalVariable(Type::Int1Ty, false, GlobalValue::InternalLinkage, -                       ConstantInt::getFalse(), GV->getName()+".init", -                       (Module *)NULL, GV->isThreadLocal()); +    new GlobalVariable(Context, Type::getInt1Ty(Context), false, +                       GlobalValue::InternalLinkage, +                       ConstantInt::getFalse(Context), GV->getName()+".init", +                       GV->isThreadLocal());    bool InitBoolUsed = false;    // Loop over all uses of GV, processing them in turn. @@ -872,10 +888,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,            Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", CI);            InitBoolUsed = true;            switch (CI->getPredicate()) { -          default: assert(0 && "Unknown ICmp Predicate!"); +          default: llvm_unreachable("Unknown ICmp Predicate!");            case ICmpInst::ICMP_ULT:            case ICmpInst::ICMP_SLT: -            LV = ConstantInt::getFalse();   // X < null -> always false +            LV = ConstantInt::getFalse(Context);   // X < null -> always false              break;            case ICmpInst::ICMP_ULE:            case ICmpInst::ICMP_SLE: @@ -897,7 +913,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,      } else {        StoreInst *SI = cast<StoreInst>(GV->use_back());        // The global is initialized when the store to it occurs. -      new StoreInst(ConstantInt::getTrue(), InitBool, SI); +      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);        SI->eraseFromParent();      } @@ -917,9 +933,141 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,    // To further other optimizations, loop over all users of NewGV and try to    // constant prop them.  This will promote GEP instructions with constant    // indices into GEP constant-exprs, which will allow global-opt to hack on it. -  ConstantPropUsersOf(NewGV); +  ConstantPropUsersOf(NewGV, Context);    if (RepValue != NewGV) -    ConstantPropUsersOf(RepValue); +    ConstantPropUsersOf(RepValue, Context); + +  return NewGV; +} + +/// OptimizeGlobalAddressOfMalloc - This function takes the specified global +/// variable, and transforms the program as if it always contained the result of +/// the specified malloc.  Because it is always the result of the specified +/// malloc, there is no reason to actually DO the malloc.  Instead, turn the +/// malloc into a global, and any loads of GV as uses of the new global. +static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, +                                                     CallInst *CI, +                                                     BitCastInst *BCI, +                                                     LLVMContext &Context, +                                                     TargetData* TD) { +  const Type *IntPtrTy = TD->getIntPtrType(Context); +   +  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV << "  MALLOC = " << *CI); + +  ConstantInt *NElements = cast<ConstantInt>(getMallocArraySize(CI, +                                                                Context, TD)); +  if (NElements->getZExtValue() != 1) { +    // If we have an array allocation, transform it to a single element +    // allocation to make the code below simpler. +    Type *NewTy = ArrayType::get(getMallocAllocatedType(CI), +                                 NElements->getZExtValue()); +    Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy); +    Instruction* NewMI = cast<Instruction>(NewM); +    Value* Indices[2]; +    Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy); +    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2, +                                              NewMI->getName()+".el0", CI); +    BCI->replaceAllUsesWith(NewGEP); +    BCI->eraseFromParent(); +    CI->eraseFromParent(); +    BCI = cast<BitCastInst>(NewMI); +    CI = extractMallocCallFromBitCast(NewMI); +  } + +  // Create the new global variable.  The contents of the malloc'd memory is +  // undefined, so initialize with an undef value. +  // FIXME: This new global should have the alignment returned by malloc.  Code +  // could depend on malloc returning large alignment (on the mac, 16 bytes) but +  // this would only guarantee some lower alignment. +  const Type *MAT = getMallocAllocatedType(CI); +  Constant *Init = UndefValue::get(MAT); +  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),  +                                             MAT, false, +                                             GlobalValue::InternalLinkage, Init, +                                             GV->getName()+".body", +                                             GV, +                                             GV->isThreadLocal()); +   +  // Anything that used the malloc now uses the global directly. +  BCI->replaceAllUsesWith(NewGV); + +  Constant *RepValue = NewGV; +  if (NewGV->getType() != GV->getType()->getElementType()) +    RepValue = ConstantExpr::getBitCast(RepValue,  +                                        GV->getType()->getElementType()); + +  // If there is a comparison against null, we will insert a global bool to +  // keep track of whether the global was initialized yet or not. +  GlobalVariable *InitBool = +    new GlobalVariable(Context, Type::getInt1Ty(Context), false, +                       GlobalValue::InternalLinkage, +                       ConstantInt::getFalse(Context), GV->getName()+".init", +                       GV->isThreadLocal()); +  bool InitBoolUsed = false; + +  // Loop over all uses of GV, processing them in turn. +  std::vector<StoreInst*> Stores; +  while (!GV->use_empty()) +    if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) { +      while (!LI->use_empty()) { +        Use &LoadUse = LI->use_begin().getUse(); +        if (!isa<ICmpInst>(LoadUse.getUser())) +          LoadUse = RepValue; +        else { +          ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser()); +          // Replace the cmp X, 0 with a use of the bool value. +          Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI); +          InitBoolUsed = true; +          switch (ICI->getPredicate()) { +          default: llvm_unreachable("Unknown ICmp Predicate!"); +          case ICmpInst::ICMP_ULT: +          case ICmpInst::ICMP_SLT: +            LV = ConstantInt::getFalse(Context);   // X < null -> always false +            break; +          case ICmpInst::ICMP_ULE: +          case ICmpInst::ICMP_SLE: +          case ICmpInst::ICMP_EQ: +            LV = BinaryOperator::CreateNot(LV, "notinit", ICI); +            break; +          case ICmpInst::ICMP_NE: +          case ICmpInst::ICMP_UGE: +          case ICmpInst::ICMP_SGE: +          case ICmpInst::ICMP_UGT: +          case ICmpInst::ICMP_SGT: +            break;  // no change. +          } +          ICI->replaceAllUsesWith(LV); +          ICI->eraseFromParent(); +        } +      } +      LI->eraseFromParent(); +    } else { +      StoreInst *SI = cast<StoreInst>(GV->use_back()); +      // The global is initialized when the store to it occurs. +      new StoreInst(ConstantInt::getTrue(Context), InitBool, SI); +      SI->eraseFromParent(); +    } + +  // If the initialization boolean was used, insert it, otherwise delete it. +  if (!InitBoolUsed) { +    while (!InitBool->use_empty())  // Delete initializations +      cast<Instruction>(InitBool->use_back())->eraseFromParent(); +    delete InitBool; +  } else +    GV->getParent()->getGlobalList().insert(GV, InitBool); + + +  // Now the GV is dead, nuke it and the malloc. +  GV->eraseFromParent(); +  BCI->eraseFromParent(); +  CI->eraseFromParent(); + +  // To further other optimizations, loop over all users of NewGV and try to +  // constant prop them.  This will promote GEP instructions with constant +  // indices into GEP constant-exprs, which will allow global-opt to hack on it. +  ConstantPropUsersOf(NewGV, Context); +  if (RepValue != NewGV) +    ConstantPropUsersOf(RepValue, Context);    return NewGV;  } @@ -1071,7 +1219,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,  /// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from  /// GV are simple enough to perform HeapSRA, return true.  static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV, -                                                    MallocInst *MI) { +                                                    Instruction *StoredVal) {    SmallPtrSet<PHINode*, 32> LoadUsingPHIs;    SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad;    for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;  @@ -1095,7 +1243,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,        Value *InVal = PN->getIncomingValue(op);        // PHI of the stored value itself is ok. -      if (InVal == MI) continue; +      if (InVal == StoredVal) continue;        if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {          // One of the PHIs in our set is (optimistically) ok. @@ -1121,7 +1269,8 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,  static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,                 DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, -                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { +                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, +                   LLVMContext &Context) {    std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];    if (FieldNo >= FieldVals.size()) @@ -1139,19 +1288,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,      // a new Load of the scalarized global.      Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,                                             InsertedScalarizedValues, -                                           PHIsToRewrite), -                          LI->getName()+".f" + utostr(FieldNo), LI); +                                           PHIsToRewrite, Context), +                          LI->getName()+".f"+Twine(FieldNo), LI);    } else if (PHINode *PN = dyn_cast<PHINode>(V)) {      // PN's type is pointer to struct.  Make a new PHI of pointer to struct      // field.      const StructType *ST =         cast<StructType>(cast<PointerType>(PN->getType())->getElementType()); -    Result =PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), -                            PN->getName()+".f"+utostr(FieldNo), PN); +    Result = +     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), +                     PN->getName()+".f"+Twine(FieldNo), PN);      PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));    } else { -    assert(0 && "Unknown usable value"); +    llvm_unreachable("Unknown usable value");      Result = 0;    } @@ -1162,18 +1312,20 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,  /// the load, rewrite the derived value to use the HeapSRoA'd load.  static void RewriteHeapSROALoadUser(Instruction *LoadUser,                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, -                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { +                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, +                   LLVMContext &Context) {    // If this is a comparison against null, handle it.    if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {      assert(isa<ConstantPointerNull>(SCI->getOperand(1)));      // If we have a setcc of the loaded pointer, we can use a setcc of any      // field.      Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0, -                                   InsertedScalarizedValues, PHIsToRewrite); +                                   InsertedScalarizedValues, PHIsToRewrite, +                                   Context); -    Value *New = new ICmpInst(SCI->getPredicate(), NPtr, -                              Constant::getNullValue(NPtr->getType()), -                              SCI->getName(), SCI); +    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr, +                              Constant::getNullValue(NPtr->getType()),  +                              SCI->getName());      SCI->replaceAllUsesWith(New);      SCI->eraseFromParent();      return; @@ -1187,7 +1339,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,      // Load the pointer for this field.      unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();      Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo, -                                     InsertedScalarizedValues, PHIsToRewrite); +                                     InsertedScalarizedValues, PHIsToRewrite, +                                     Context);      // Create the new GEP idx vector.      SmallVector<Value*, 8> GEPIdx; @@ -1219,7 +1372,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,    // users.    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {      Instruction *User = cast<Instruction>(*UI++); -    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); +    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, +                            Context);    }  } @@ -1229,11 +1383,13 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,  /// AllGlobalLoadUsesSimpleEnoughForHeapSRA.  static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,                  DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues, -                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) { +                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite, +                   LLVMContext &Context) {    for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();         UI != E; ) {      Instruction *User = cast<Instruction>(*UI++); -    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); +    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite, +                            Context);    }    if (Load->use_empty()) { @@ -1244,8 +1400,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,  /// PerformHeapAllocSRoA - MI is an allocation of an array of structures.  Break  /// it up into multiple allocations of arrays of the fields. -static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){ -  DOUT << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI; +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI, +                                            LLVMContext &Context){ +  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *MI);    const StructType *STy = cast<StructType>(MI->getAllocatedType());    // There is guaranteed to be at least one use of the malloc (storing @@ -1264,14 +1421,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){      const Type *PFieldTy = PointerType::getUnqual(FieldTy);      GlobalVariable *NGV = -      new GlobalVariable(PFieldTy, false, GlobalValue::InternalLinkage, +      new GlobalVariable(*GV->getParent(), +                         PFieldTy, false, GlobalValue::InternalLinkage,                           Constant::getNullValue(PFieldTy), -                         GV->getName() + ".f" + utostr(FieldNo), GV, +                         GV->getName() + ".f" + Twine(FieldNo), GV,                           GV->isThreadLocal());      FieldGlobals.push_back(NGV);      MallocInst *NMI = new MallocInst(FieldTy, MI->getArraySize(), -                                     MI->getName() + ".f" + utostr(FieldNo),MI); +                                     MI->getName() + ".f" + Twine(FieldNo), MI);      FieldMallocs.push_back(NMI);      new StoreInst(NMI, NGV, MI);    } @@ -1290,9 +1448,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){    //    }    Value *RunningOr = 0;    for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { -    Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, FieldMallocs[i], -                             Constant::getNullValue(FieldMallocs[i]->getType()), -                                  "isnull", MI); +    Value *Cond = new ICmpInst(MI, ICmpInst::ICMP_EQ, FieldMallocs[i], +                              Constant::getNullValue(FieldMallocs[i]->getType()), +                                  "isnull");      if (!RunningOr)        RunningOr = Cond;   // First seteq      else @@ -1305,7 +1463,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){    // Create the block to check the first condition.  Put all these blocks at the    // end of the function as they are unlikely to be executed. -  BasicBlock *NullPtrBlock = BasicBlock::Create("malloc_ret_null", +  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",                                                  OrigBB->getParent());    // Remove the uncond branch from OrigBB to ContBB, turning it into a cond @@ -1317,11 +1475,13 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){    // pointer, because some may be null while others are not.    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {      Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); -    Value *Cmp = new ICmpInst(ICmpInst::ICMP_NE, GVVal,  +    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,                                 Constant::getNullValue(GVVal->getType()), -                              "tmp", NullPtrBlock); -    BasicBlock *FreeBlock = BasicBlock::Create("free_it", OrigBB->getParent()); -    BasicBlock *NextBlock = BasicBlock::Create("next", OrigBB->getParent()); +                              "tmp"); +    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",  +                                               OrigBB->getParent()); +    BasicBlock *NextBlock = BasicBlock::Create(Context, "next",  +                                               OrigBB->getParent());      BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock);      // Fill in FreeBlock. @@ -1353,7 +1513,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){      Instruction *User = cast<Instruction>(*UI++);      if (LoadInst *LI = dyn_cast<LoadInst>(User)) { -      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); +      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, +                                   Context);        continue;      } @@ -1384,7 +1545,192 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {        Value *InVal = PN->getIncomingValue(i);        InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, -                               PHIsToRewrite); +                               PHIsToRewrite, Context); +      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i)); +    } +  } +   +  // Drop all inter-phi links and any loads that made it this far. +  for (DenseMap<Value*, std::vector<Value*> >::iterator +       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); +       I != E; ++I) { +    if (PHINode *PN = dyn_cast<PHINode>(I->first)) +      PN->dropAllReferences(); +    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) +      LI->dropAllReferences(); +  } +   +  // Delete all the phis and loads now that inter-references are dead. +  for (DenseMap<Value*, std::vector<Value*> >::iterator +       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); +       I != E; ++I) { +    if (PHINode *PN = dyn_cast<PHINode>(I->first)) +      PN->eraseFromParent(); +    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first)) +      LI->eraseFromParent(); +  } +   +  // The old global is now dead, remove it. +  GV->eraseFromParent(); + +  ++NumHeapSRA; +  return cast<GlobalVariable>(FieldGlobals[0]); +} + +/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break +/// it up into multiple allocations of arrays of the fields. +static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, +                                            CallInst *CI, BitCastInst* BCI,  +                                            LLVMContext &Context, +                                            TargetData *TD){ +  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC CALL = " << *CI  +               << " BITCAST = " << *BCI << '\n'); +  const Type* MAT = getMallocAllocatedType(CI); +  const StructType *STy = cast<StructType>(MAT); + +  // There is guaranteed to be at least one use of the malloc (storing +  // it into GV).  If there are other uses, change them to be uses of +  // the global to simplify later code.  This also deletes the store +  // into GV. +  ReplaceUsesOfMallocWithGlobal(BCI, GV); +   +  // Okay, at this point, there are no users of the malloc.  Insert N +  // new mallocs at the same place as CI, and N globals. +  std::vector<Value*> FieldGlobals; +  std::vector<Value*> FieldMallocs; +   +  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ +    const Type *FieldTy = STy->getElementType(FieldNo); +    const PointerType *PFieldTy = PointerType::getUnqual(FieldTy); +     +    GlobalVariable *NGV = +      new GlobalVariable(*GV->getParent(), +                         PFieldTy, false, GlobalValue::InternalLinkage, +                         Constant::getNullValue(PFieldTy), +                         GV->getName() + ".f" + Twine(FieldNo), GV, +                         GV->isThreadLocal()); +    FieldGlobals.push_back(NGV); +     +    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), FieldTy, +                                        getMallocArraySize(CI, Context, TD), +                                        BCI->getName() + ".f" + Twine(FieldNo)); +    FieldMallocs.push_back(NMI); +    new StoreInst(NMI, NGV, BCI); +  } +   +  // The tricky aspect of this transformation is handling the case when malloc +  // fails.  In the original code, malloc failing would set the result pointer +  // of malloc to null.  In this case, some mallocs could succeed and others +  // could fail.  As such, we emit code that looks like this: +  //    F0 = malloc(field0) +  //    F1 = malloc(field1) +  //    F2 = malloc(field2) +  //    if (F0 == 0 || F1 == 0 || F2 == 0) { +  //      if (F0) { free(F0); F0 = 0; } +  //      if (F1) { free(F1); F1 = 0; } +  //      if (F2) { free(F2); F2 = 0; } +  //    } +  Value *RunningOr = 0; +  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { +    Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i], +                              Constant::getNullValue(FieldMallocs[i]->getType()), +                                  "isnull"); +    if (!RunningOr) +      RunningOr = Cond;   // First seteq +    else +      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI); +  } + +  // Split the basic block at the old malloc. +  BasicBlock *OrigBB = BCI->getParent(); +  BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont"); +   +  // Create the block to check the first condition.  Put all these blocks at the +  // end of the function as they are unlikely to be executed. +  BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null", +                                                OrigBB->getParent()); +   +  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond +  // branch on RunningOr. +  OrigBB->getTerminator()->eraseFromParent(); +  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB); +   +  // Within the NullPtrBlock, we need to emit a comparison and branch for each +  // pointer, because some may be null while others are not. +  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { +    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock); +    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,  +                              Constant::getNullValue(GVVal->getType()), +                              "tmp"); +    BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it", +                                               OrigBB->getParent()); +    BasicBlock *NextBlock = BasicBlock::Create(Context, "next", +                                               OrigBB->getParent()); +    BranchInst::Create(FreeBlock, NextBlock, Cmp, NullPtrBlock); + +    // Fill in FreeBlock. +    new FreeInst(GVVal, FreeBlock); +    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i], +                  FreeBlock); +    BranchInst::Create(NextBlock, FreeBlock); +     +    NullPtrBlock = NextBlock; +  } +   +  BranchInst::Create(ContBB, NullPtrBlock); +   +  // CI and BCI are no longer needed, remove them. +  BCI->eraseFromParent(); +  CI->eraseFromParent(); + +  /// InsertedScalarizedLoads - As we process loads, if we can't immediately +  /// update all uses of the load, keep track of what scalarized loads are +  /// inserted for a given load. +  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues; +  InsertedScalarizedValues[GV] = FieldGlobals; +   +  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite; +   +  // Okay, the malloc site is completely handled.  All of the uses of GV are now +  // loads, and all uses of those loads are simple.  Rewrite them to use loads +  // of the per-field globals instead. +  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) { +    Instruction *User = cast<Instruction>(*UI++); +     +    if (LoadInst *LI = dyn_cast<LoadInst>(User)) { +      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite, +                                   Context); +      continue; +    } +     +    // Must be a store of null. +    StoreInst *SI = cast<StoreInst>(User); +    assert(isa<ConstantPointerNull>(SI->getOperand(0)) && +           "Unexpected heap-sra user!"); +     +    // Insert a store of null into each global. +    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) { +      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType()); +      Constant *Null = Constant::getNullValue(PT->getElementType()); +      new StoreInst(Null, FieldGlobals[i], SI); +    } +    // Erase the original store. +    SI->eraseFromParent(); +  } + +  // While we have PHIs that are interesting to rewrite, do it. +  while (!PHIsToRewrite.empty()) { +    PHINode *PN = PHIsToRewrite.back().first; +    unsigned FieldNo = PHIsToRewrite.back().second; +    PHIsToRewrite.pop_back(); +    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]); +    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi"); + +    // Add all the incoming values.  This can materialize more phis. +    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { +      Value *InVal = PN->getIncomingValue(i); +      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues, +                               PHIsToRewrite, Context);        FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));      }    } @@ -1422,7 +1768,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, MallocInst *MI){  static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,                                                 MallocInst *MI,                                                 Module::global_iterator &GVI, -                                               TargetData &TD) { +                                               TargetData *TD, +                                               LLVMContext &Context) {    // If this is a malloc of an abstract type, don't touch it.    if (!MI->getAllocatedType()->isSized())      return false; @@ -1456,9 +1803,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,      // Restrict this transformation to only working on small allocations      // (2048 bytes currently), as we don't want to introduce a 16M global or      // something. -    if (NElements->getZExtValue()* -        TD.getTypeAllocSize(MI->getAllocatedType()) < 2048) { -      GVI = OptimizeGlobalAddressOfMalloc(GV, MI); +    if (TD && +        NElements->getZExtValue()* +        TD->getTypeAllocSize(MI->getAllocatedType()) < 2048) { +      GVI = OptimizeGlobalAddressOfMalloc(GV, MI, Context);        return true;      }    } @@ -1485,7 +1833,8 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,        if (const ArrayType *AT = dyn_cast<ArrayType>(MI->getAllocatedType())) {          MallocInst *NewMI =             new MallocInst(AllocSTy,  -                         ConstantInt::get(Type::Int32Ty, AT->getNumElements()), +                  ConstantInt::get(Type::getInt32Ty(Context), +                  AT->getNumElements()),                           "", MI);          NewMI->takeName(MI);          Value *Cast = new BitCastInst(NewMI, MI->getType(), "tmp", MI); @@ -1494,7 +1843,100 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,          MI = NewMI;        } -      GVI = PerformHeapAllocSRoA(GV, MI); +      GVI = PerformHeapAllocSRoA(GV, MI, Context); +      return true; +    } +  } +   +  return false; +}   + +/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a +/// pointer global variable with a single value stored it that is a malloc or +/// cast of malloc. +static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, +                                               CallInst *CI, +                                               BitCastInst *BCI, +                                               Module::global_iterator &GVI, +                                               TargetData *TD, +                                               LLVMContext &Context) { +  // If we can't figure out the type being malloced, then we can't optimize. +  const Type *AllocTy = getMallocAllocatedType(CI); +  assert(AllocTy); + +  // If this is a malloc of an abstract type, don't touch it. +  if (!AllocTy->isSized()) +    return false; + +  // We can't optimize this global unless all uses of it are *known* to be +  // of the malloc value, not of the null initializer value (consider a use +  // that compares the global's value against zero to see if the malloc has +  // been reached).  To do this, we check to see if all uses of the global +  // would trap if the global were null: this proves that they must all +  // happen after the malloc. +  if (!AllUsesOfLoadedValueWillTrapIfNull(GV)) +    return false; + +  // We can't optimize this if the malloc itself is used in a complex way, +  // for example, being stored into multiple globals.  This allows the +  // malloc to be stored into the specified global, loaded setcc'd, and +  // GEP'd.  These are all things we could transform to using the global +  // for. +  { +    SmallPtrSet<PHINode*, 8> PHIs; +    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs)) +      return false; +  }   + +  // If we have a global that is only initialized with a fixed size malloc, +  // transform the program to use global memory instead of malloc'd memory. +  // This eliminates dynamic allocation, avoids an indirection accessing the +  // data, and exposes the resultant global to further GlobalOpt. +  if (ConstantInt *NElements = +              dyn_cast<ConstantInt>(getMallocArraySize(CI, Context, TD))) { +    // Restrict this transformation to only working on small allocations +    // (2048 bytes currently), as we don't want to introduce a 16M global or +    // something. +    if (TD &&  +        NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) { +      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, Context, TD); +      return true; +    } +  } +   +  // If the allocation is an array of structures, consider transforming this +  // into multiple malloc'd arrays, one for each field.  This is basically +  // SRoA for malloc'd memory. + +  // If this is an allocation of a fixed size array of structs, analyze as a +  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100 +  if (!isArrayMalloc(CI, Context, TD)) +    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy)) +      AllocTy = AT->getElementType(); +   +  if (const StructType *AllocSTy = dyn_cast<StructType>(AllocTy)) { +    // This the structure has an unreasonable number of fields, leave it +    // alone. +    if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 && +        AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) { + +      // If this is a fixed size array, transform the Malloc to be an alloc of +      // structs.  malloc [100 x struct],1 -> malloc struct, 100 +      if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) { +        Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context), +                                              AT->getNumElements()); +        Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context), +                                              AllocSTy, NumElements, +                                              BCI->getName()); +        Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI); +        BCI->replaceAllUsesWith(Cast); +        BCI->eraseFromParent(); +        CI->eraseFromParent(); +        BCI = cast<BitCastInst>(NewMI); +        CI = extractMallocCallFromBitCast(NewMI); +      } +       +      GVI = PerformHeapAllocSRoA(GV, CI, BCI, Context, TD);        return true;      }    } @@ -1506,7 +1948,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,  // that only one value (besides its initializer) is ever stored to the global.  static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,                                       Module::global_iterator &GVI, -                                     TargetData &TD) { +                                     TargetData *TD, LLVMContext &Context) {    // Ignore no-op GEPs and bitcasts.    StoredOnceVal = StoredOnceVal->stripPointerCasts(); @@ -1518,14 +1960,25 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,        GV->getInitializer()->isNullValue()) {      if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {        if (GV->getInitializer()->getType() != SOVC->getType()) -        SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType()); +        SOVC =  +         ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());        // Optimize away any trapping uses of the loaded value. -      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC)) +      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))          return true;      } else if (MallocInst *MI = dyn_cast<MallocInst>(StoredOnceVal)) { -      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD)) +      if (TryToOptimizeStoreOfMallocToGlobal(GV, MI, GVI, TD, Context))          return true; +    } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) { +      if (getMallocAllocatedType(CI)) { +        BitCastInst* BCI = NULL; +        for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); +             UI != E; ) +          BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++)); +        if (BCI && +            TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context)) +          return true; +      }      }    } @@ -1536,7 +1989,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,  /// two values ever stored into GV are its initializer and OtherVal.  See if we  /// can shrink the global into a boolean and select between the two values  /// whenever it is used.  This exposes the values to other scalar optimizations. -static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { +static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal, +                                       LLVMContext &Context) {    const Type *GVElType = GV->getType()->getElementType();    // If GVElType is already i1, it is already shrunk.  If the type of the GV is @@ -1544,7 +1998,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {    // between them is very expensive and unlikely to lead to later    // simplification.  In these cases, we typically end up with "cond ? v1 : v2"    // where v1 and v2 both require constant pool loads, a big loss. -  if (GVElType == Type::Int1Ty || GVElType->isFloatingPoint() || +  if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() ||        isa<PointerType>(GVElType) || isa<VectorType>(GVElType))      return false; @@ -1554,18 +2008,19 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {      if (!isa<LoadInst>(I) && !isa<StoreInst>(I))        return false; -  DOUT << "   *** SHRINKING TO BOOL: " << *GV; +  DEBUG(errs() << "   *** SHRINKING TO BOOL: " << *GV);    // Create the new global, initializing it to false. -  GlobalVariable *NewGV = new GlobalVariable(Type::Int1Ty, false, -         GlobalValue::InternalLinkage, ConstantInt::getFalse(), +  GlobalVariable *NewGV = new GlobalVariable(Context, +                                             Type::getInt1Ty(Context), false, +         GlobalValue::InternalLinkage, ConstantInt::getFalse(Context),                                               GV->getName()+".b", -                                             (Module *)NULL,                                               GV->isThreadLocal());    GV->getParent()->getGlobalList().insert(GV, NewGV);    Constant *InitVal = GV->getInitializer(); -  assert(InitVal->getType() != Type::Int1Ty && "No reason to shrink to bool!"); +  assert(InitVal->getType() != Type::getInt1Ty(Context) && +         "No reason to shrink to bool!");    // If initialized to zero and storing one into the global, we can use a cast    // instead of a select to synthesize the desired value. @@ -1581,7 +2036,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {        // Only do this if we weren't storing a loaded value.        Value *StoreVal;        if (StoringOther || SI->getOperand(0) == InitVal) -        StoreVal = ConstantInt::get(Type::Int1Ty, StoringOther); +        StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther);        else {          // Otherwise, we are storing a previously loaded copy.  To do this,          // change the copy from copying the original value to just copying the @@ -1632,7 +2087,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,    GV->removeDeadConstantUsers();    if (GV->use_empty()) { -    DOUT << "GLOBAL DEAD: " << *GV; +    DEBUG(errs() << "GLOBAL DEAD: " << *GV);      GV->eraseFromParent();      ++NumDeleted;      return true; @@ -1675,7 +2130,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,          GS.AccessingFunction->getName() == "main" &&          GS.AccessingFunction->hasExternalLinkage() &&          GV->getType()->getAddressSpace() == 0) { -      DOUT << "LOCALIZING GLOBAL: " << *GV; +      DEBUG(errs() << "LOCALIZING GLOBAL: " << *GV);        Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();        const Type* ElemTy = GV->getType()->getElementType();        // FIXME: Pass Global's alignment when globals have alignment @@ -1692,11 +2147,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,      // If the global is never loaded (but may be stored to), it is dead.      // Delete it now.      if (!GS.isLoaded) { -      DOUT << "GLOBAL NEVER LOADED: " << *GV; +      DEBUG(errs() << "GLOBAL NEVER LOADED: " << *GV);        // Delete any stores we can find to the global.  We may not be able to        // make it completely dead though. -      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer()); +      bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(),  +                                                GV->getContext());        // If the global is dead now, delete it.        if (GV->use_empty()) { @@ -1707,16 +2163,16 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,        return Changed;      } else if (GS.StoredType <= GlobalStatus::isInitializerStored) { -      DOUT << "MARKING CONSTANT: " << *GV; +      DEBUG(errs() << "MARKING CONSTANT: " << *GV);        GV->setConstant(true);        // Clean up any obviously simplifiable users now. -      CleanupConstantGlobalUsers(GV, GV->getInitializer()); +      CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext());        // If the global is dead now, just nuke it.        if (GV->use_empty()) { -        DOUT << "   *** Marking constant allowed us to simplify " -             << "all users and delete global!\n"; +        DEBUG(errs() << "   *** Marking constant allowed us to simplify " +                     << "all users and delete global!\n");          GV->eraseFromParent();          ++NumDeleted;        } @@ -1724,11 +2180,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,        ++NumMarked;        return true;      } else if (!GV->getInitializer()->getType()->isSingleValueType()) { -      if (GlobalVariable *FirstNewGV = SRAGlobal(GV,  -                                                 getAnalysis<TargetData>())) { -        GVI = FirstNewGV;  // Don't skip the newly produced globals! -        return true; -      } +      if (TargetData *TD = getAnalysisIfAvailable<TargetData>()) +        if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD, +                                                   GV->getContext())) { +          GVI = FirstNewGV;  // Don't skip the newly produced globals! +          return true; +        }      } else if (GS.StoredType == GlobalStatus::isStoredOnce) {        // If the initial value for the global was an undef value, and if only        // one other value was stored into it, we can just change the @@ -1740,11 +2197,12 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,            GV->setInitializer(SOVConstant);            // Clean up any obviously simplifiable users now. -          CleanupConstantGlobalUsers(GV, GV->getInitializer()); +          CleanupConstantGlobalUsers(GV, GV->getInitializer(),  +                                     GV->getContext());            if (GV->use_empty()) { -            DOUT << "   *** Substituting initializer allowed us to " -                 << "simplify all users and delete global!\n"; +            DEBUG(errs() << "   *** Substituting initializer allowed us to " +                         << "simplify all users and delete global!\n");              GV->eraseFromParent();              ++NumDeleted;            } else { @@ -1757,13 +2215,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,        // Try to optimize globals based on the knowledge that only one value        // (besides its initializer) is ever stored to the global.        if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI, -                                   getAnalysis<TargetData>())) +                                   getAnalysisIfAvailable<TargetData>(), +                                   GV->getContext()))          return true;        // Otherwise, if the global was not a boolean, we can shrink it to be a        // boolean.        if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) -        if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) { +        if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) {            ++NumShrunkToBool;            return true;          } @@ -1866,16 +2325,16 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {        if (!ATy) return 0;        const StructType *STy = dyn_cast<StructType>(ATy->getElementType());        if (!STy || STy->getNumElements() != 2 || -          STy->getElementType(0) != Type::Int32Ty) return 0; +          STy->getElementType(0) != Type::getInt32Ty(M.getContext())) return 0;        const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));        if (!PFTy) return 0;        const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType()); -      if (!FTy || FTy->getReturnType() != Type::VoidTy || FTy->isVarArg() || -          FTy->getNumParams() != 0) +      if (!FTy || FTy->getReturnType() != Type::getVoidTy(M.getContext()) || +          FTy->isVarArg() || FTy->getNumParams() != 0)          return 0;        // Verify that the initializer is simple enough for us to handle. -      if (!I->hasInitializer()) return 0; +      if (!I->hasDefinitiveInitializer()) return 0;        ConstantArray *CA = dyn_cast<ConstantArray>(I->getInitializer());        if (!CA) return 0;        for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) @@ -1916,10 +2375,11 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {  /// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the  /// specified array, returning the new global to use.  static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,  -                                          const std::vector<Function*> &Ctors) { +                                          const std::vector<Function*> &Ctors, +                                          LLVMContext &Context) {    // If we made a change, reassemble the initializer list.    std::vector<Constant*> CSVals; -  CSVals.push_back(ConstantInt::get(Type::Int32Ty, 65535)); +  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535));    CSVals.push_back(0);    // Create the new init list. @@ -1928,19 +2388,19 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,      if (Ctors[i]) {        CSVals[1] = Ctors[i];      } else { -      const Type *FTy = FunctionType::get(Type::VoidTy, false); +      const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false);        const PointerType *PFTy = PointerType::getUnqual(FTy);        CSVals[1] = Constant::getNullValue(PFTy); -      CSVals[0] = ConstantInt::get(Type::Int32Ty, 2147483647); +      CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647);      } -    CAList.push_back(ConstantStruct::get(CSVals)); +    CAList.push_back(ConstantStruct::get(Context, CSVals, false));    }    // Create the array initializer.    const Type *StructTy = -    cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); -  Constant *CA = ConstantArray::get(ArrayType::get(StructTy, CAList.size()), -                                    CAList); +      cast<ArrayType>(GCL->getType()->getElementType())->getElementType(); +  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,  +                                                   CAList.size()), CAList);    // If we didn't change the number of elements, don't create a new GV.    if (CA->getType() == GCL->getInitializer()->getType()) { @@ -1949,9 +2409,9 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,    }    // Create the new global and insert it next to the existing list. -  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), +  GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(),  +                                           GCL->isConstant(),                                             GCL->getLinkage(), CA, "", -                                           (Module *)NULL,                                             GCL->isThreadLocal());    GCL->getParent()->getGlobalList().insert(GCL, NGV);    NGV->takeName(GCL); @@ -1984,21 +2444,38 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,  /// enough for us to understand.  In particular, if it is a cast of something,  /// we punt.  We basically just support direct accesses to globals and GEP's of  /// globals.  This should be kept up to date with CommitValueTo. -static bool isSimpleEnoughPointerToCommit(Constant *C) { -  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { -    if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) -      return false;  // do not allow weak/linkonce/dllimport/dllexport linkage. -    return !GV->isDeclaration();  // reject external globals. -  } +static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) { +  // Conservatively, avoid aggregate types. This is because we don't +  // want to worry about them partially overlapping other stores. +  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType()) +    return false; + +  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) +    // Do not allow weak/linkonce/dllimport/dllexport linkage or +    // external globals. +    return GV->hasDefinitiveInitializer(); +    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))      // Handle a constantexpr gep.      if (CE->getOpcode() == Instruction::GetElementPtr && -        isa<GlobalVariable>(CE->getOperand(0))) { +        isa<GlobalVariable>(CE->getOperand(0)) && +        cast<GEPOperator>(CE)->isInBounds()) {        GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); -      if (!GV->hasExternalLinkage() && !GV->hasLocalLinkage()) -        return false;  // do not allow weak/linkonce/dllimport/dllexport linkage. -      return GV->hasInitializer() && -             ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); +      // Do not allow weak/linkonce/dllimport/dllexport linkage or +      // external globals. +      if (!GV->hasDefinitiveInitializer()) +        return false; + +      // The first index must be zero. +      ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin())); +      if (!CI || !CI->isZero()) return false; + +      // The remaining indices must be compile-time known integers within the +      // notional bounds of the corresponding static array types. +      if (!CE->isGEPWithNoNotionalOverIndexing()) +        return false; + +      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);      }    return false;  } @@ -2007,7 +2484,8 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {  /// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.  /// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.  static Constant *EvaluateStoreInto(Constant *Init, Constant *Val, -                                   ConstantExpr *Addr, unsigned OpNo) { +                                   ConstantExpr *Addr, unsigned OpNo, +                                   LLVMContext &Context) {    // Base case of the recursion.    if (OpNo == Addr->getNumOperands()) {      assert(Val->getType() == Init->getType() && "Type mismatch!"); @@ -2028,7 +2506,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)          Elts.push_back(UndefValue::get(STy->getElementType(i)));      } else { -      assert(0 && "This code is out of sync with " +      llvm_unreachable("This code is out of sync with "               " ConstantFoldLoadThroughGEPConstantExpr");      } @@ -2036,10 +2514,10 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,      ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));      unsigned Idx = CU->getZExtValue();      assert(Idx < STy->getNumElements() && "Struct index out of range!"); -    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1); +    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context);      // Return the modified struct. -    return ConstantStruct::get(&Elts[0], Elts.size(), STy->isPacked()); +    return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked());    } else {      ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));      const ArrayType *ATy = cast<ArrayType>(Init->getType()); @@ -2056,20 +2534,21 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,        Constant *Elt = UndefValue::get(ATy->getElementType());        Elts.assign(ATy->getNumElements(), Elt);      } else { -      assert(0 && "This code is out of sync with " +      llvm_unreachable("This code is out of sync with "               " ConstantFoldLoadThroughGEPConstantExpr");      }      assert(CI->getZExtValue() < ATy->getNumElements());      Elts[CI->getZExtValue()] = -      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1); +      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context);      return ConstantArray::get(ATy, Elts);    }      }  /// CommitValueTo - We have decided that Addr (which satisfies the predicate  /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen. -static void CommitValueTo(Constant *Val, Constant *Addr) { +static void CommitValueTo(Constant *Val, Constant *Addr, +                          LLVMContext &Context) {    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {      assert(GV->hasInitializer());      GV->setInitializer(Val); @@ -2080,7 +2559,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {    GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));    Constant *Init = GV->getInitializer(); -  Init = EvaluateStoreInto(Init, Val, CE, 2); +  Init = EvaluateStoreInto(Init, Val, CE, 2, Context);    GV->setInitializer(Init);  } @@ -2088,7 +2567,8 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {  /// P after the stores reflected by 'memory' have been performed.  If we can't  /// decide, return null.  static Constant *ComputeLoadResult(Constant *P, -                                const DenseMap<Constant*, Constant*> &Memory) { +                                const DenseMap<Constant*, Constant*> &Memory, +                                LLVMContext &Context) {    // If this memory location has been recently stored, use the stored value: it    // is the most up-to-date.    DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P); @@ -2096,7 +2576,7 @@ static Constant *ComputeLoadResult(Constant *P,    // Access it.    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) { -    if (GV->hasInitializer()) +    if (GV->hasDefinitiveInitializer())        return GV->getInitializer();      return 0;    } @@ -2106,7 +2586,7 @@ static Constant *ComputeLoadResult(Constant *P,      if (CE->getOpcode() == Instruction::GetElementPtr &&          isa<GlobalVariable>(CE->getOperand(0))) {        GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); -      if (GV->hasInitializer()) +      if (GV->hasDefinitiveInitializer())          return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);      } @@ -2117,7 +2597,7 @@ static Constant *ComputeLoadResult(Constant *P,  /// successful, false if we can't evaluate it.  ActualArgs contains the formal  /// arguments for the function.  static bool EvaluateFunction(Function *F, Constant *&RetVal, -                             const std::vector<Constant*> &ActualArgs, +                             const SmallVectorImpl<Constant*> &ActualArgs,                               std::vector<Function*> &CallStack,                               DenseMap<Constant*, Constant*> &MutatedMemory,                               std::vector<GlobalVariable*> &AllocaTmps) { @@ -2126,6 +2606,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,    if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())      return false; +  LLVMContext &Context = F->getContext(); +      CallStack.push_back(F);    /// Values - As we compute SSA register values, we store their contents here. @@ -2152,7 +2634,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,      if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {        if (SI->isVolatile()) return false;  // no volatile accesses.        Constant *Ptr = getVal(Values, SI->getOperand(1)); -      if (!isSimpleEnoughPointerToCommit(Ptr)) +      if (!isSimpleEnoughPointerToCommit(Ptr, Context))          // If this is too complex for us to commit, reject it.          return false;        Constant *Val = getVal(Values, SI->getOperand(0)); @@ -2170,7 +2652,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,                                           getVal(Values, CI->getOperand(0)),                                           CI->getType());      } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { -      InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), +      InstResult = +            ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),                                             getVal(Values, SI->getOperand(1)),                                             getVal(Values, SI->getOperand(2)));      } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { @@ -2179,16 +2662,18 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,        for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();             i != e; ++i)          GEPOps.push_back(getVal(Values, *i)); -      InstResult = ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size()); +      InstResult = cast<GEPOperator>(GEP)->isInBounds() ? +          ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) : +          ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());      } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {        if (LI->isVolatile()) return false;  // no volatile accesses.        InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)), -                                     MutatedMemory); +                                     MutatedMemory, Context);        if (InstResult == 0) return false; // Could not evaluate load.      } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {        if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.        const Type *Ty = AI->getType()->getElementType(); -      AllocaTmps.push_back(new GlobalVariable(Ty, false, +      AllocaTmps.push_back(new GlobalVariable(Context, Ty, false,                                                GlobalValue::InternalLinkage,                                                UndefValue::get(Ty),                                                AI->getName())); @@ -2208,14 +2693,14 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,        Function *Callee = dyn_cast<Function>(getVal(Values, CI->getOperand(0)));        if (!Callee) return false;  // Cannot resolve. -      std::vector<Constant*> Formals; +      SmallVector<Constant*, 8> Formals;        for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();             i != e; ++i)          Formals.push_back(getVal(Values, *i)); -       +        if (Callee->isDeclaration()) {          // If this is a function we can constant fold, do it. -        if (Constant *C = ConstantFoldCall(Callee, &Formals[0], +        if (Constant *C = ConstantFoldCall(Callee, Formals.data(),                                             Formals.size())) {            InstResult = C;          } else { @@ -2310,16 +2795,17 @@ static bool EvaluateStaticConstructor(Function *F) {    // Call the function.    Constant *RetValDummy; -  bool EvalSuccess = EvaluateFunction(F, RetValDummy, std::vector<Constant*>(), -                                       CallStack, MutatedMemory, AllocaTmps); +  bool EvalSuccess = EvaluateFunction(F, RetValDummy, +                                      SmallVector<Constant*, 0>(), CallStack, +                                      MutatedMemory, AllocaTmps);    if (EvalSuccess) {      // We succeeded at evaluation: commit the result. -    DOUT << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" -         << F->getName() << "' to " << MutatedMemory.size() -         << " stores.\n"; +    DEBUG(errs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" +          << F->getName() << "' to " << MutatedMemory.size() +          << " stores.\n");      for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),           E = MutatedMemory.end(); I != E; ++I) -      CommitValueTo(I->second, I->first); +      CommitValueTo(I->second, I->first, F->getContext());    }    // At this point, we are done interpreting.  If we created any 'alloca' @@ -2376,7 +2862,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {    if (!MadeChange) return false; -  GCL = InstallGlobalCtors(GCL, Ctors); +  GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext());    return true;  } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index e4a9deadd971..7b0e9c727cd4 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -19,6 +19,7 @@  #include "llvm/Transforms/IPO.h"  #include "llvm/Constants.h"  #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Analysis/ValueTracking.h" @@ -129,7 +130,8 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {    Function::arg_iterator AI = F.arg_begin();    for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {      // Do we have a constant argument? -    if (ArgumentConstants[i].second || AI->use_empty()) +    if (ArgumentConstants[i].second || AI->use_empty() || +        (AI->hasByValAttr() && !F.onlyReadsMemory()))        continue;      Value *V = ArgumentConstants[i].first; @@ -151,13 +153,15 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {  // callers will be updated to use the value they pass in directly instead of  // using the return value.  bool IPCP::PropagateConstantReturn(Function &F) { -  if (F.getReturnType() == Type::VoidTy) +  if (F.getReturnType() == Type::getVoidTy(F.getContext()))      return false; // No return value.    // If this function could be overridden later in the link stage, we can't    // propagate information about its results into callers.    if (F.mayBeOverridden())      return false; +     +  LLVMContext &Context = F.getContext();    // Check to see if this function returns a constant.    SmallVector<Value *,4> RetVals; @@ -182,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {          if (!STy)            V = RI->getOperand(i);          else -          V = FindInsertedValue(RI->getOperand(0), i); +          V = FindInsertedValue(RI->getOperand(0), i, Context);          if (V) {            // Ignore undefs, we can change them into anything diff --git a/lib/Transforms/IPO/IndMemRemoval.cpp b/lib/Transforms/IPO/IndMemRemoval.cpp index b55dea2c759c..e7884ec634b6 100644 --- a/lib/Transforms/IPO/IndMemRemoval.cpp +++ b/lib/Transforms/IPO/IndMemRemoval.cpp @@ -1,4 +1,4 @@ -//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ----------===// +//===-- IndMemRemoval.cpp - Remove indirect allocations and frees ---------===//  //  //                     The LLVM Compiler Infrastructure  // @@ -10,8 +10,8 @@  // This pass finds places where memory allocation functions may escape into  // indirect land.  Some transforms are much easier (aka possible) only if free   // or malloc are not called indirectly. -// Thus find places where the address of memory functions are taken and construct -// bounce functions with direct calls of those functions. +// Thus find places where the address of memory functions are taken and  +// construct bounce functions with direct calls of those functions.  //  //===----------------------------------------------------------------------===// @@ -55,8 +55,8 @@ bool IndMemRemPass::runOnModule(Module &M) {        Function* FN = Function::Create(F->getFunctionType(),                                        GlobalValue::LinkOnceAnyLinkage,                                        "free_llvm_bounce", &M); -      BasicBlock* bb = BasicBlock::Create("entry",FN); -      Instruction* R = ReturnInst::Create(bb); +      BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN); +      Instruction* R = ReturnInst::Create(M.getContext(), bb);        new FreeInst(FN->arg_begin(), R);        ++NumBounce;        NumBounceSites += F->getNumUses(); @@ -70,11 +70,12 @@ bool IndMemRemPass::runOnModule(Module &M) {                                        GlobalValue::LinkOnceAnyLinkage,                                        "malloc_llvm_bounce", &M);        FN->setDoesNotAlias(0); -      BasicBlock* bb = BasicBlock::Create("entry",FN); +      BasicBlock* bb = BasicBlock::Create(M.getContext(), "entry",FN);        Instruction* c = CastInst::CreateIntegerCast( -          FN->arg_begin(), Type::Int32Ty, false, "c", bb); -      Instruction* a = new MallocInst(Type::Int8Ty, c, "m", bb); -      ReturnInst::Create(a, bb); +          FN->arg_begin(), Type::getInt32Ty(M.getContext()), false, "c", bb); +      Instruction* a = new MallocInst(Type::getInt8Ty(M.getContext()), +                                      c, "m", bb); +      ReturnInst::Create(M.getContext(), a, bb);        ++NumBounce;        NumBounceSites += F->getNumUses();        F->replaceAllUsesWith(FN); diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 5f9ea5453c1f..2344403391cf 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -19,11 +19,11 @@  #include "llvm/Module.h"  #include "llvm/Type.h"  #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h"  #include "llvm/Support/CallSite.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Transforms/IPO.h"  #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/InlineCost.h"  #include "llvm/ADT/SmallPtrSet.h"  using namespace llvm; diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index e107a0023ce6..b1c643b558c5 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -18,11 +18,11 @@  #include "llvm/Module.h"  #include "llvm/Type.h"  #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h"  #include "llvm/Support/CallSite.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Transforms/IPO.h"  #include "llvm/Transforms/IPO/InlinerPass.h" -#include "llvm/Transforms/Utils/InlineCost.h"  #include "llvm/ADT/SmallPtrSet.h"  using namespace llvm; @@ -78,7 +78,7 @@ bool SimpleInliner::doInitialization(CallGraph &CG) {      return false;    // Don't crash on invalid code -  if (!GV->hasInitializer()) +  if (!GV->hasDefinitiveInitializer())      return false;    const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index b382837289bd..ea47366f47ed 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -18,21 +18,25 @@  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h"  #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/InlineCost.h"  #include "llvm/Support/CallSite.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Transforms/IPO/InlinerPass.h"  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/Statistic.h"  #include <set>  using namespace llvm;  STATISTIC(NumInlined, "Number of functions inlined");  STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); +STATISTIC(NumMergedAllocas, "Number of allocas merged together");  static cl::opt<int> -InlineLimit("inline-threshold", cl::Hidden, cl::init(200), +InlineLimit("inline-threshold", cl::Hidden, cl::init(200), cl::ZeroOrMore,          cl::desc("Control the amount of inlining to perform (default = 200)"));  Inliner::Inliner(void *ID)  @@ -45,19 +49,32 @@ Inliner::Inliner(void *ID, int Threshold)  /// the call graph.  If the derived class implements this method, it should  /// always explicitly call the implementation here.  void Inliner::getAnalysisUsage(AnalysisUsage &Info) const { -  Info.addRequired<TargetData>();    CallGraphSCCPass::getAnalysisUsage(Info);  } -// InlineCallIfPossible - If it is possible to inline the specified call site, -// do so and update the CallGraph for this operation. -bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG, -                                 const SmallPtrSet<Function*, 8> &SCCFunctions, -                                 const TargetData &TD) { + +typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> > +InlinedArrayAllocasTy; + +/// InlineCallIfPossible - If it is possible to inline the specified call site, +/// do so and update the CallGraph for this operation. +/// +/// This function also does some basic book-keeping to update the IR.  The +/// InlinedArrayAllocas map keeps track of any allocas that are already +/// available from other  functions inlined into the caller.  If we are able to +/// inline this call site we attempt to reuse already available allocas or add +/// any new allocas to the set if not possible. +static bool InlineCallIfPossible(CallSite CS, CallGraph &CG, +                                 const TargetData *TD, +                                 InlinedArrayAllocasTy &InlinedArrayAllocas) {    Function *Callee = CS.getCalledFunction();    Function *Caller = CS.getCaller(); -  if (!InlineFunction(CS, &CG, &TD)) return false; +  // Try to inline the function.  Get the list of static allocas that were +  // inlined. +  SmallVector<AllocaInst*, 16> StaticAllocas; +  if (!InlineFunction(CS, &CG, TD, &StaticAllocas)) +    return false;    // If the inlined function had a higher stack protection level than the    // calling function, then bump up the caller's stack protection level. @@ -67,23 +84,89 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,             !Caller->hasFnAttr(Attribute::StackProtectReq))      Caller->addFnAttr(Attribute::StackProtect); -  // If we inlined the last possible call site to the function, delete the -  // function body now. -  if (Callee->use_empty() && (Callee->hasLocalLinkage() || -                              Callee->hasAvailableExternallyLinkage()) && -      !SCCFunctions.count(Callee)) { -    DOUT << "    -> Deleting dead function: " << Callee->getName() << "\n"; -    CallGraphNode *CalleeNode = CG[Callee]; - -    // Remove any call graph edges from the callee to its callees. -    CalleeNode->removeAllCalledFunctions(); - -    resetCachedCostInfo(CalleeNode->getFunction()); +   +  // Look at all of the allocas that we inlined through this call site.  If we +  // have already inlined other allocas through other calls into this function, +  // then we know that they have disjoint lifetimes and that we can merge them. +  // +  // There are many heuristics possible for merging these allocas, and the +  // different options have different tradeoffs.  One thing that we *really* +  // don't want to hurt is SRoA: once inlining happens, often allocas are no +  // longer address taken and so they can be promoted. +  // +  // Our "solution" for that is to only merge allocas whose outermost type is an +  // array type.  These are usually not promoted because someone is using a +  // variable index into them.  These are also often the most important ones to +  // merge. +  // +  // A better solution would be to have real memory lifetime markers in the IR +  // and not have the inliner do any merging of allocas at all.  This would +  // allow the backend to do proper stack slot coloring of all allocas that +  // *actually make it to the backend*, which is really what we want. +  // +  // Because we don't have this information, we do this simple and useful hack. +  // +  SmallPtrSet<AllocaInst*, 16> UsedAllocas; +   +  // Loop over all the allocas we have so far and see if they can be merged with +  // a previously inlined alloca.  If not, remember that we had it. +  for (unsigned AllocaNo = 0, e = StaticAllocas.size(); +       AllocaNo != e; ++AllocaNo) { +    AllocaInst *AI = StaticAllocas[AllocaNo]; +     +    // Don't bother trying to merge array allocations (they will usually be +    // canonicalized to be an allocation *of* an array), or allocations whose +    // type is not itself an array (because we're afraid of pessimizing SRoA). +    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); +    if (ATy == 0 || AI->isArrayAllocation()) +      continue; +     +    // Get the list of all available allocas for this array type. +    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; +     +    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note +    // that we have to be careful not to reuse the same "available" alloca for +    // multiple different allocas that we just inlined, we use the 'UsedAllocas' +    // set to keep track of which "available" allocas are being used by this +    // function.  Also, AllocasForType can be empty of course! +    bool MergedAwayAlloca = false; +    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { +      AllocaInst *AvailableAlloca = AllocasForType[i]; +       +      // The available alloca has to be in the right function, not in some other +      // function in this SCC. +      if (AvailableAlloca->getParent() != AI->getParent()) +        continue; +       +      // If the inlined function already uses this alloca then we can't reuse +      // it. +      if (!UsedAllocas.insert(AvailableAlloca)) +        continue; +       +      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare +      // success! +      DEBUG(errs() << "    ***MERGED ALLOCA: " << *AI); +       +      AI->replaceAllUsesWith(AvailableAlloca); +      AI->eraseFromParent(); +      MergedAwayAlloca = true; +      ++NumMergedAllocas; +      break; +    } -    // Removing the node for callee from the call graph and delete it. -    delete CG.removeFunctionFromModule(CalleeNode); -    ++NumDeleted; +    // If we already nuked the alloca, we're done with it. +    if (MergedAwayAlloca) +      continue; + +    // If we were unable to merge away the alloca either because there are no +    // allocas of the right type available or because we reused them all +    // already, remember that this alloca came from an inlined function and mark +    // it used so we don't reuse it for other allocas from this inline +    // operation. +    AllocasForType.push_back(AI); +    UsedAllocas.insert(AI);    } +      return true;  } @@ -91,69 +174,145 @@ bool Inliner::InlineCallIfPossible(CallSite CS, CallGraph &CG,  /// at the given CallSite.  bool Inliner::shouldInline(CallSite CS) {    InlineCost IC = getInlineCost(CS); -  float FudgeFactor = getInlineFudgeFactor(CS);    if (IC.isAlways()) { -    DOUT << "    Inlining: cost=always" -         << ", Call: " << *CS.getInstruction(); +    DEBUG(errs() << "    Inlining: cost=always" +          << ", Call: " << *CS.getInstruction() << "\n");      return true;    }    if (IC.isNever()) { -    DOUT << "    NOT Inlining: cost=never" -         << ", Call: " << *CS.getInstruction(); +    DEBUG(errs() << "    NOT Inlining: cost=never" +          << ", Call: " << *CS.getInstruction() << "\n");      return false;    }    int Cost = IC.getValue();    int CurrentThreshold = InlineThreshold; -  Function *Fn = CS.getCaller(); -  if (Fn && !Fn->isDeclaration()  -      && Fn->hasFnAttr(Attribute::OptimizeForSize) -      && InlineThreshold != 50) { +  Function *Caller = CS.getCaller(); +  if (Caller && !Caller->isDeclaration() && +      Caller->hasFnAttr(Attribute::OptimizeForSize) && +      InlineLimit.getNumOccurrences() == 0 && +      InlineThreshold != 50)      CurrentThreshold = 50; -  } +  float FudgeFactor = getInlineFudgeFactor(CS);    if (Cost >= (int)(CurrentThreshold * FudgeFactor)) { -    DOUT << "    NOT Inlining: cost=" << Cost -         << ", Call: " << *CS.getInstruction(); +    DEBUG(errs() << "    NOT Inlining: cost=" << Cost +          << ", Call: " << *CS.getInstruction() << "\n");      return false; -  } else { -    DOUT << "    Inlining: cost=" << Cost -         << ", Call: " << *CS.getInstruction(); -    return true;    } +   +  // Try to detect the case where the current inlining candidate caller +  // (call it B) is a static function and is an inlining candidate elsewhere, +  // and the current candidate callee (call it C) is large enough that +  // inlining it into B would make B too big to inline later.  In these +  // circumstances it may be best not to inline C into B, but to inline B +  // into its callers. +  if (Caller->hasLocalLinkage()) { +    int TotalSecondaryCost = 0; +    bool outerCallsFound = false; +    bool allOuterCallsWillBeInlined = true; +    bool someOuterCallWouldNotBeInlined = false; +    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();  +         I != E; ++I) { +      CallSite CS2 = CallSite::get(*I); + +      // If this isn't a call to Caller (it could be some other sort +      // of reference) skip it. +      if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller) +        continue; + +      InlineCost IC2 = getInlineCost(CS2); +      if (IC2.isNever()) +        allOuterCallsWillBeInlined = false; +      if (IC2.isAlways() || IC2.isNever()) +        continue; + +      outerCallsFound = true; +      int Cost2 = IC2.getValue(); +      int CurrentThreshold2 = InlineThreshold; +      Function *Caller2 = CS2.getCaller(); +      if (Caller2 && !Caller2->isDeclaration() && +          Caller2->hasFnAttr(Attribute::OptimizeForSize) && +          InlineThreshold != 50) +        CurrentThreshold2 = 50; + +      float FudgeFactor2 = getInlineFudgeFactor(CS2); + +      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2)) +        allOuterCallsWillBeInlined = false; + +      // See if we have this case.  We subtract off the penalty +      // for the call instruction, which we would be deleting. +      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) && +          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >=  +                (int)(CurrentThreshold2 * FudgeFactor2)) { +        someOuterCallWouldNotBeInlined = true; +        TotalSecondaryCost += Cost2; +      } +    } +    // If all outer calls to Caller would get inlined, the cost for the last +    // one is set very low by getInlineCost, in anticipation that Caller will +    // be removed entirely.  We did not account for this above unless there +    // is only one caller of Caller. +    if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end()) +      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus; + +    if (outerCallsFound && someOuterCallWouldNotBeInlined &&  +        TotalSecondaryCost < Cost) { +      DEBUG(errs() << "    NOT Inlining: " << *CS.getInstruction() <<  +           " Cost = " << Cost <<  +           ", outer Cost = " << TotalSecondaryCost << '\n'); +      return false; +    } +  } + +  DEBUG(errs() << "    Inlining: cost=" << Cost +        << ", Call: " << *CS.getInstruction() << '\n'); +  return true;  } -bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) { +bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {    CallGraph &CG = getAnalysis<CallGraph>(); -  TargetData &TD = getAnalysis<TargetData>(); +  const TargetData *TD = getAnalysisIfAvailable<TargetData>();    SmallPtrSet<Function*, 8> SCCFunctions; -  DOUT << "Inliner visiting SCC:"; +  DEBUG(errs() << "Inliner visiting SCC:");    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {      Function *F = SCC[i]->getFunction();      if (F) SCCFunctions.insert(F); -    DOUT << " " << (F ? F->getName() : "INDIRECTNODE"); +    DEBUG(errs() << " " << (F ? F->getName() : "INDIRECTNODE"));    }    // Scan through and identify all call sites ahead of time so that we only    // inline call sites in the original functions, not call sites that result    // from inlining other functions. -  std::vector<CallSite> CallSites; +  SmallVector<CallSite, 16> CallSites; -  for (unsigned i = 0, e = SCC.size(); i != e; ++i) -    if (Function *F = SCC[i]->getFunction()) -      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) -        for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { -          CallSite CS = CallSite::get(I); -          if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(I) && -                                     (!CS.getCalledFunction() || -                                      !CS.getCalledFunction()->isDeclaration())) -            CallSites.push_back(CS); -        } +  for (unsigned i = 0, e = SCC.size(); i != e; ++i) { +    Function *F = SCC[i]->getFunction(); +    if (!F) continue; +     +    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) +      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { +        CallSite CS = CallSite::get(I); +        // If this isn't a call, or it is a call to an intrinsic, it can +        // never be inlined. +        if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I)) +          continue; +         +        // If this is a direct call to an external function, we can never inline +        // it.  If it is an indirect call, inlining may resolve it to be a +        // direct call, so we keep it. +        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) +          continue; +         +        CallSites.push_back(CS); +      } +  } -  DOUT << ": " << CallSites.size() << " call sites.\n"; +  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");    // Now that we have all of the call sites, move the ones to functions in the    // current SCC to the end of the list. @@ -163,6 +322,9 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {        if (SCCFunctions.count(F))          std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); +   +  InlinedArrayAllocasTy InlinedArrayAllocas; +      // Now that we have all of the call sites, loop over them and inline them if    // it looks profitable to do so.    bool Changed = false; @@ -171,51 +333,68 @@ bool Inliner::runOnSCC(const std::vector<CallGraphNode*> &SCC) {      LocalChange = false;      // Iterate over the outer loop because inlining functions can cause indirect      // calls to become direct calls. -    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) -      if (Function *Callee = CallSites[CSi].getCalledFunction()) { -        // Calls to external functions are never inlinable. -        if (Callee->isDeclaration()) { -          if (SCC.size() == 1) { -            std::swap(CallSites[CSi], CallSites.back()); -            CallSites.pop_back(); -          } else { -            // Keep the 'in SCC / not in SCC' boundary correct. -            CallSites.erase(CallSites.begin()+CSi); -          } -          --CSi; -          continue; -        } - -        // If the policy determines that we should inline this function, -        // try to do so. -        CallSite CS = CallSites[CSi]; -        if (shouldInline(CS)) { -          Function *Caller = CS.getCaller(); -          // Attempt to inline the function... -          if (InlineCallIfPossible(CS, CG, SCCFunctions, TD)) { -            // Remove any cached cost info for this caller, as inlining the -            // callee has increased the size of the caller (which may be the -            // same as the callee). -            resetCachedCostInfo(Caller); - -            // Remove this call site from the list.  If possible, use  -            // swap/pop_back for efficiency, but do not use it if doing so would -            // move a call site to a function in this SCC before the -            // 'FirstCallInSCC' barrier. -            if (SCC.size() == 1) { -              std::swap(CallSites[CSi], CallSites.back()); -              CallSites.pop_back(); -            } else { -              CallSites.erase(CallSites.begin()+CSi); -            } -            --CSi; - -            ++NumInlined; -            Changed = true; -            LocalChange = true; -          } -        } +    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { +      CallSite CS = CallSites[CSi]; +       +      Function *Callee = CS.getCalledFunction(); +      // We can only inline direct calls to non-declarations. +      if (Callee == 0 || Callee->isDeclaration()) continue; +       +      // If the policy determines that we should inline this function, +      // try to do so. +      if (!shouldInline(CS)) +        continue; + +      Function *Caller = CS.getCaller(); +      // Attempt to inline the function... +      if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas)) +        continue; +       +      // If we inlined the last possible call site to the function, delete the +      // function body now. +      if (Callee->use_empty() && Callee->hasLocalLinkage() && +          // TODO: Can remove if in SCC now. +          !SCCFunctions.count(Callee) && +           +          // The function may be apparently dead, but if there are indirect +          // callgraph references to the node, we cannot delete it yet, this +          // could invalidate the CGSCC iterator. +          CG[Callee]->getNumReferences() == 0) { +        DEBUG(errs() << "    -> Deleting dead function: " +              << Callee->getName() << "\n"); +        CallGraphNode *CalleeNode = CG[Callee]; +         +        // Remove any call graph edges from the callee to its callees. +        CalleeNode->removeAllCalledFunctions(); +         +        resetCachedCostInfo(Callee); +         +        // Removing the node for callee from the call graph and delete it. +        delete CG.removeFunctionFromModule(CalleeNode); +        ++NumDeleted;        } +       +      // Remove any cached cost info for this caller, as inlining the +      // callee has increased the size of the caller (which may be the +      // same as the callee). +      resetCachedCostInfo(Caller); + +      // Remove this call site from the list.  If possible, use  +      // swap/pop_back for efficiency, but do not use it if doing so would +      // move a call site to a function in this SCC before the +      // 'FirstCallInSCC' barrier. +      if (SCC.size() == 1) { +        std::swap(CallSites[CSi], CallSites.back()); +        CallSites.pop_back(); +      } else { +        CallSites.erase(CallSites.begin()+CSi); +      } +      --CSi; + +      ++NumInlined; +      Changed = true; +      LocalChange = true; +    }    } while (LocalChange);    return Changed; @@ -227,47 +406,55 @@ bool Inliner::doFinalization(CallGraph &CG) {    return removeDeadFunctions(CG);  } -  /// removeDeadFunctions - Remove dead functions that are not included in -  /// DNR (Do Not Remove) list. +/// removeDeadFunctions - Remove dead functions that are not included in +/// DNR (Do Not Remove) list.  bool Inliner::removeDeadFunctions(CallGraph &CG,  -                                 SmallPtrSet<const Function *, 16> *DNR) { -  std::set<CallGraphNode*> FunctionsToRemove; +                                  SmallPtrSet<const Function *, 16> *DNR) { +  SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;    // Scan for all of the functions, looking for ones that should now be removed    // from the program.  Insert the dead ones in the FunctionsToRemove set.    for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {      CallGraphNode *CGN = I->second; -    if (Function *F = CGN ? CGN->getFunction() : 0) { -      // If the only remaining users of the function are dead constants, remove -      // them. -      F->removeDeadConstantUsers(); - -      if (DNR && DNR->count(F)) -        continue; +    if (CGN->getFunction() == 0) +      continue; +     +    Function *F = CGN->getFunction(); +     +    // If the only remaining users of the function are dead constants, remove +    // them. +    F->removeDeadConstantUsers(); + +    if (DNR && DNR->count(F)) +      continue; +    if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && +        !F->hasAvailableExternallyLinkage()) +      continue; +    if (!F->use_empty()) +      continue; +     +    // Remove any call graph edges from the function to its callees. +    CGN->removeAllCalledFunctions(); + +    // Remove any edges from the external node to the function's call graph +    // node.  These edges might have been made irrelegant due to +    // optimization of the program. +    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); -      if ((F->hasLinkOnceLinkage() || F->hasLocalLinkage()) && -          F->use_empty()) { - -        // Remove any call graph edges from the function to its callees. -        CGN->removeAllCalledFunctions(); - -        // Remove any edges from the external node to the function's call graph -        // node.  These edges might have been made irrelegant due to -        // optimization of the program. -        CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); - -        // Removing the node for callee from the call graph and delete it. -        FunctionsToRemove.insert(CGN); -      } -    } +    // Removing the node for callee from the call graph and delete it. +    FunctionsToRemove.insert(CGN);    }    // Now that we know which functions to delete, do so.  We didn't want to do    // this inline, because that would invalidate our CallGraph::iterator    // objects. :( +  // +  // Note that it doesn't matter that we are iterating over a non-stable set +  // here to do this, it doesn't matter which order the functions are deleted +  // in.    bool Changed = false; -  for (std::set<CallGraphNode*>::iterator I = FunctionsToRemove.begin(), -         E = FunctionsToRemove.end(); I != E; ++I) { +  for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(), +       E = FunctionsToRemove.end(); I != E; ++I) {      resetCachedCostInfo((*I)->getFunction());      delete CG.removeFunctionFromModule(*I);      ++NumDeleted; diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 5093ae90b5ba..e3c3c672c590 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -21,6 +21,7 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/Statistic.h"  #include <fstream>  #include <set> @@ -86,7 +87,7 @@ void InternalizePass::LoadFile(const char *Filename) {    // Load the APIFile...    std::ifstream In(Filename);    if (!In.good()) { -    cerr << "WARNING: Internalize couldn't load file '" << Filename +    errs() << "WARNING: Internalize couldn't load file '" << Filename           << "'! Continuing as if it's empty.\n";      return; // Just continue as if the file were empty    } @@ -101,7 +102,7 @@ void InternalizePass::LoadFile(const char *Filename) {  bool InternalizePass::runOnModule(Module &M) {    CallGraph *CG = getAnalysisIfAvailable<CallGraph>();    CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; - +      if (ExternalNames.empty()) {      // Return if we're not in 'all but main' mode and have no external api      if (!AllButMain) @@ -131,12 +132,14 @@ bool InternalizePass::runOnModule(Module &M) {        if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);        Changed = true;        ++NumFunctions; -      DOUT << "Internalizing func " << I->getName() << "\n"; +      DEBUG(errs() << "Internalizing func " << I->getName() << "\n");      }    // Never internalize the llvm.used symbol.  It is used to implement    // attribute((used)). +  // FIXME: Shouldn't this just filter on llvm.metadata section??    ExternalNames.insert("llvm.used"); +  ExternalNames.insert("llvm.compiler.used");    // Never internalize anchors used by the machine module info, else the info    // won't find them.  (see MachineModuleInfo.) @@ -158,7 +161,7 @@ bool InternalizePass::runOnModule(Module &M) {        I->setLinkage(GlobalValue::InternalLinkage);        Changed = true;        ++NumGlobals; -      DOUT << "Internalized gvar " << I->getName() << "\n"; +      DEBUG(errs() << "Internalized gvar " << I->getName() << "\n");      }    // Mark all aliases that are not in the api as internal as well. @@ -169,7 +172,7 @@ bool InternalizePass::runOnModule(Module &M) {        I->setLinkage(GlobalValue::InternalLinkage);        Changed = true;        ++NumAliases; -      DOUT << "Internalized alias " << I->getName() << "\n"; +      DEBUG(errs() << "Internalized alias " << I->getName() << "\n");      }    return Changed; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 0c654438d508..02ac3bb903c7 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -20,7 +20,7 @@  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Transforms/Scalar.h" @@ -33,23 +33,19 @@ using namespace llvm;  STATISTIC(NumExtracted, "Number of loops extracted");  namespace { -  // FIXME: This is not a function pass, but the PassManager doesn't allow -  // Module passes to require FunctionPasses, so we can't get loop info if we're -  // not a function pass. -  struct VISIBILITY_HIDDEN LoopExtractor : public FunctionPass { +  struct VISIBILITY_HIDDEN LoopExtractor : public LoopPass {      static char ID; // Pass identification, replacement for typeid      unsigned NumLoops;      explicit LoopExtractor(unsigned numLoops = ~0)  -      : FunctionPass(&ID), NumLoops(numLoops) {} +      : LoopPass(&ID), NumLoops(numLoops) {} -    virtual bool runOnFunction(Function &F); +    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        AU.addRequiredID(BreakCriticalEdgesID);        AU.addRequiredID(LoopSimplifyID);        AU.addRequired<DominatorTree>(); -      AU.addRequired<LoopInfo>();      }    };  } @@ -73,68 +69,50 @@ Y("loop-extract-single", "Extract at most one loop into a new function");  // createLoopExtractorPass - This pass extracts all natural loops from the  // program into a function if it can.  // -FunctionPass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } +Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); } -bool LoopExtractor::runOnFunction(Function &F) { -  LoopInfo &LI = getAnalysis<LoopInfo>(); - -  // If this function has no loops, there is nothing to do. -  if (LI.empty()) +bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { +  // Only visit top-level loops. +  if (L->getParentLoop())      return false;    DominatorTree &DT = getAnalysis<DominatorTree>(); - -  // If there is more than one top-level loop in this function, extract all of -  // the loops.    bool Changed = false; -  if (LI.end()-LI.begin() > 1) { -    for (LoopInfo::iterator i = LI.begin(), e = LI.end(); i != e; ++i) { -      if (NumLoops == 0) return Changed; -      --NumLoops; -      Changed |= ExtractLoop(DT, *i) != 0; -      ++NumExtracted; -    } -  } else { -    // Otherwise there is exactly one top-level loop.  If this function is more -    // than a minimal wrapper around the loop, extract the loop. -    Loop *TLL = *LI.begin(); -    bool ShouldExtractLoop = false; - -    // Extract the loop if the entry block doesn't branch to the loop header. -    TerminatorInst *EntryTI = F.getEntryBlock().getTerminator(); -    if (!isa<BranchInst>(EntryTI) || -        !cast<BranchInst>(EntryTI)->isUnconditional() || -        EntryTI->getSuccessor(0) != TLL->getHeader()) -      ShouldExtractLoop = true; -    else { -      // Check to see if any exits from the loop are more than just return -      // blocks. -      SmallVector<BasicBlock*, 8> ExitBlocks; -      TLL->getExitBlocks(ExitBlocks); -      for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) -        if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { -          ShouldExtractLoop = true; -          break; -        } -    } -    if (ShouldExtractLoop) { -      if (NumLoops == 0) return Changed; -      --NumLoops; -      Changed |= ExtractLoop(DT, TLL) != 0; -      ++NumExtracted; -    } else { -      // Okay, this function is a minimal container around the specified loop. -      // If we extract the loop, we will continue to just keep extracting it -      // infinitely... so don't extract it.  However, if the loop contains any -      // subloops, extract them. -      for (Loop::iterator i = TLL->begin(), e = TLL->end(); i != e; ++i) { -        if (NumLoops == 0) return Changed; -        --NumLoops; -        Changed |= ExtractLoop(DT, *i) != 0; -        ++NumExtracted; +  // If there is more than one top-level loop in this function, extract all of +  // the loops. Otherwise there is exactly one top-level loop; in this case if +  // this function is more than a minimal wrapper around the loop, extract +  // the loop. +  bool ShouldExtractLoop = false; + +  // Extract the loop if the entry block doesn't branch to the loop header. +  TerminatorInst *EntryTI = +    L->getHeader()->getParent()->getEntryBlock().getTerminator(); +  if (!isa<BranchInst>(EntryTI) || +      !cast<BranchInst>(EntryTI)->isUnconditional() || +      EntryTI->getSuccessor(0) != L->getHeader()) +    ShouldExtractLoop = true; +  else { +    // Check to see if any exits from the loop are more than just return +    // blocks. +    SmallVector<BasicBlock*, 8> ExitBlocks; +    L->getExitBlocks(ExitBlocks); +    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) +      if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) { +        ShouldExtractLoop = true; +        break;        } +  } +  if (ShouldExtractLoop) { +    if (NumLoops == 0) return Changed; +    --NumLoops; +    if (ExtractLoop(DT, L) != 0) { +      Changed = true; +      // After extraction, the loop is replaced by a function call, so +      // we shouldn't try to run any more loop passes on it. +      LPM.deleteLoopFromQueue(L);      } +    ++NumExtracted;    }    return Changed; @@ -143,7 +121,7 @@ bool LoopExtractor::runOnFunction(Function &F) {  // createSingleLoopExtractorPass - This pass extracts one natural loop from the  // program into a function if it can.  This is used by bugpoint.  // -FunctionPass *llvm::createSingleLoopExtractorPass() { +Pass *llvm::createSingleLoopExtractorPass() {    return new SingleLoopExtractor();  } @@ -193,8 +171,8 @@ void BlockExtractorPass::LoadFile(const char *Filename) {    // Load the BlockFile...    std::ifstream In(Filename);    if (!In.good()) { -    cerr << "WARNING: BlockExtractor couldn't load file '" << Filename -         << "'!\n"; +    errs() << "WARNING: BlockExtractor couldn't load file '" << Filename +           << "'!\n";      return;    }    while (In) { diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index dfc040b83342..55194b34cf20 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -39,6 +39,7 @@  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h"  #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Support/CFG.h" @@ -200,7 +201,7 @@ bool LowerSetJmp::runOnModule(Module& M) {  // This function is always successful, unless it isn't.  bool LowerSetJmp::doInitialization(Module& M)  { -  const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); +  const Type *SBPTy = Type::getInt8PtrTy(M.getContext());    const Type *SBPPTy = PointerType::getUnqual(SBPTy);    // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for @@ -208,33 +209,40 @@ bool LowerSetJmp::doInitialization(Module& M)    // void __llvm_sjljeh_init_setjmpmap(void**)    InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap", -                                    Type::VoidTy, SBPPTy, (Type *)0); +                                    Type::getVoidTy(M.getContext()), +                                    SBPPTy, (Type *)0);    // void __llvm_sjljeh_destroy_setjmpmap(void**)    DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap", -                                       Type::VoidTy, SBPPTy, (Type *)0); +                                       Type::getVoidTy(M.getContext()), +                                       SBPPTy, (Type *)0);    // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)    AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map", -                                     Type::VoidTy, SBPPTy, SBPTy, -                                     Type::Int32Ty, (Type *)0); +                                     Type::getVoidTy(M.getContext()), +                                     SBPPTy, SBPTy, +                                     Type::getInt32Ty(M.getContext()), +                                     (Type *)0);    // void __llvm_sjljeh_throw_longjmp(int*, int)    ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp", -                                       Type::VoidTy, SBPTy, Type::Int32Ty, +                                       Type::getVoidTy(M.getContext()), SBPTy,  +                                       Type::getInt32Ty(M.getContext()),                                         (Type *)0);    // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)    TryCatchLJ =      M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception", -                          Type::Int32Ty, SBPPTy, (Type *)0); +                          Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);    // bool __llvm_sjljeh_is_longjmp_exception()    IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception", -                                        Type::Int1Ty, (Type *)0); +                                        Type::getInt1Ty(M.getContext()), +                                        (Type *)0);    // int __llvm_sjljeh_get_longjmp_value()    GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value", -                                     Type::Int32Ty, (Type *)0); +                                     Type::getInt32Ty(M.getContext()), +                                     (Type *)0);    return true;  } @@ -257,7 +265,8 @@ bool LowerSetJmp::IsTransformableFunction(const std::string& Name) {  // throwing the exception for us.  void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)  { -  const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); +  const Type* SBPTy = +        Type::getInt8PtrTy(Inst->getContext());    // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the    // same parameters as "longjmp", except that the buffer is cast to a @@ -278,7 +287,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)    if (SVP.first)      BranchInst::Create(SVP.first->getParent(), Inst);    else -    new UnwindInst(Inst); +    new UnwindInst(Inst->getContext(), Inst);    // Remove all insts after the branch/unwind inst.  Go from back to front to    // avoid replaceAllUsesWith if possible. @@ -309,7 +318,8 @@ AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)    assert(Inst && "Couldn't find even ONE instruction in entry block!");    // Fill in the alloca and call to initialize the SJ map. -  const Type *SBPTy = PointerType::getUnqual(Type::Int8Ty); +  const Type *SBPTy = +        Type::getInt8PtrTy(Func->getContext());    AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);    CallInst::Create(InitSJMap, Map, "", Inst);    return SJMap[Func] = Map; @@ -324,12 +334,13 @@ BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)    // The basic block we're going to jump to if we need to rethrow the    // exception. -  BasicBlock* Rethrow = BasicBlock::Create("RethrowExcept", Func); +  BasicBlock* Rethrow = +        BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);    // Fill in the "Rethrow" BB with a call to rethrow the exception. This    // is the last instruction in the BB since at this point the runtime    // should exit this function and go to the next function. -  new UnwindInst(Rethrow); +  new UnwindInst(Func->getContext(), Rethrow);    return RethrowBBMap[Func] = Rethrow;  } @@ -340,7 +351,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,  {    if (SwitchValMap[Func].first) return SwitchValMap[Func]; -  BasicBlock* LongJmpPre = BasicBlock::Create("LongJmpBlkPre", Func); +  BasicBlock* LongJmpPre = +        BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);    // Keep track of the preliminary basic block for some of the other    // transformations. @@ -352,7 +364,8 @@ LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,    // The "decision basic block" gets the number associated with the    // setjmp call returning to switch on and the value returned by    // longjmp. -  BasicBlock* DecisionBB = BasicBlock::Create("LJDecisionBB", Func); +  BasicBlock* DecisionBB = +        BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);    BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre); @@ -375,12 +388,13 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)    Function* Func = ABlock->getParent();    // Add this setjmp to the setjmp map. -  const Type* SBPTy = PointerType::getUnqual(Type::Int8Ty); +  const Type* SBPTy = +          Type::getInt8PtrTy(Inst->getContext());    CastInst* BufPtr =       new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);    std::vector<Value*> Args =       make_vector<Value*>(GetSetJmpMap(Func), BufPtr, -                        ConstantInt::get(Type::Int32Ty, +                        ConstantInt::get(Type::getInt32Ty(Inst->getContext()),                                           SetJmpIDMap[Func]++), 0);    CallInst::Create(AddSJToMap, Args.begin(), Args.end(), "", Inst); @@ -424,14 +438,17 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)    // This PHI node will be in the new block created from the    // splitBasicBlock call. -  PHINode* PHI = PHINode::Create(Type::Int32Ty, "SetJmpReturn", Inst); +  PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), +                                 "SetJmpReturn", Inst);    // Coming from a call to setjmp, the return is 0. -  PHI->addIncoming(ConstantInt::getNullValue(Type::Int32Ty), ABlock); +  PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())), +                   ABlock);    // Add the case for this setjmp's number...    SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func)); -  SVP.first->addCase(ConstantInt::get(Type::Int32Ty, SetJmpIDMap[Func] - 1), +  SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()), +                                      SetJmpIDMap[Func] - 1),                       SetJmpContBlock);    // Value coming from the handling of the exception. @@ -503,7 +520,8 @@ void LowerSetJmp::visitInvokeInst(InvokeInst& II)    BasicBlock* ExceptBB = II.getUnwindDest();    Function* Func = BB->getParent(); -  BasicBlock* NewExceptBB = BasicBlock::Create("InvokeExcept", Func); +  BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(),  +                                               "InvokeExcept", Func);    // If this is a longjmp exception, then branch to the preliminary BB of    // the longjmp exception handling. Otherwise, go to the old exception. diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 5693cc0fc3b4..13bbf9c682e4 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -47,11 +47,14 @@  #include "llvm/Constants.h"  #include "llvm/InlineAsm.h"  #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Support/CallSite.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h"  #include <map>  #include <vector>  using namespace llvm; @@ -61,7 +64,7 @@ STATISTIC(NumFunctionsMerged, "Number of functions merged");  namespace {    struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass {      static char ID; // Pass identification, replacement for typeid -    MergeFunctions() : ModulePass((intptr_t)&ID) {} +    MergeFunctions() : ModulePass(&ID) {}      bool runOnModule(Module &M);    }; @@ -127,7 +130,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {      return false;    default: -    assert(0 && "Unknown type!"); +    llvm_unreachable("Unknown type!");      return false;    case Type::PointerTyID: { @@ -185,7 +188,8 @@ static bool  isEquivalentOperation(const Instruction *I1, const Instruction *I2) {    if (I1->getOpcode() != I2->getOpcode() ||        I1->getNumOperands() != I2->getNumOperands() || -      !isEquivalentType(I1->getType(), I2->getType())) +      !isEquivalentType(I1->getType(), I2->getType()) || +      !I1->hasSameSubclassOptionalData(I2))      return false;    // We have two instructions of identical opcode and #operands.  Check to see @@ -449,6 +453,7 @@ static LinkageCategory categorize(const Function *F) {    switch (F->getLinkage()) {    case GlobalValue::InternalLinkage:    case GlobalValue::PrivateLinkage: +  case GlobalValue::LinkerPrivateLinkage:      return Internal;    case GlobalValue::WeakAnyLinkage: @@ -468,14 +473,14 @@ static LinkageCategory categorize(const Function *F) {      return ExternalStrong;    } -  assert(0 && "Unknown LinkageType."); +  llvm_unreachable("Unknown LinkageType.");    return ExternalWeak;  }  static void ThunkGToF(Function *F, Function *G) {    Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",                                      G->getParent()); -  BasicBlock *BB = BasicBlock::Create("", NewG); +  BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);    std::vector<Value *> Args;    unsigned i = 0; @@ -494,13 +499,13 @@ static void ThunkGToF(Function *F, Function *G) {    CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);    CI->setTailCall();    CI->setCallingConv(F->getCallingConv()); -  if (NewG->getReturnType() == Type::VoidTy) { -    ReturnInst::Create(BB); +  if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) { +    ReturnInst::Create(F->getContext(), BB);    } else if (CI->getType() != NewG->getReturnType()) {      Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); -    ReturnInst::Create(BCI, BB); +    ReturnInst::Create(F->getContext(), BCI, BB);    } else { -    ReturnInst::Create(CI, BB); +    ReturnInst::Create(F->getContext(), CI, BB);    }    NewG->copyAttributesFrom(G); @@ -574,22 +579,22 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {      case Internal:        switch (catG) {          case ExternalStrong: -          assert(0); +          llvm_unreachable(0);            // fall-through          case ExternalWeak: -	  if (F->hasAddressTaken()) +          if (F->hasAddressTaken())              ThunkGToF(F, G);            else              AliasGToF(F, G); -	  break; +          break;          case Internal: {            bool addrTakenF = F->hasAddressTaken();            bool addrTakenG = G->hasAddressTaken();            if (!addrTakenF && addrTakenG) {              std::swap(FnVec[i], FnVec[j]);              std::swap(F, G); -	    std::swap(addrTakenF, addrTakenG); -	  } +            std::swap(addrTakenF, addrTakenG); +          }            if (addrTakenF && addrTakenG) {              ThunkGToF(F, G); @@ -597,7 +602,7 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {              assert(!addrTakenG);              AliasGToF(F, G);            } -	} break; +        } break;        }        break;    } @@ -629,19 +634,19 @@ bool MergeFunctions::runOnModule(Module &M) {    bool LocalChanged;    do {      LocalChanged = false; -    DOUT << "size: " << FnMap.size() << "\n"; +    DEBUG(errs() << "size: " << FnMap.size() << "\n");      for (std::map<unsigned long, std::vector<Function *> >::iterator           I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {        std::vector<Function *> &FnVec = I->second; -      DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n"; +      DEBUG(errs() << "hash (" << I->first << "): " << FnVec.size() << "\n");        for (int i = 0, e = FnVec.size(); i != e; ++i) {          for (int j = i + 1; j != e; ++j) {            bool isEqual = equals(FnVec[i], FnVec[j]); -          DOUT << "  " << FnVec[i]->getName() -               << (isEqual ? " == " : " != ") -               << FnVec[j]->getName() << "\n"; +          DEBUG(errs() << "  " << FnVec[i]->getName() +                << (isEqual ? " == " : " != ") +                << FnVec[j]->getName() << "\n");            if (isEqual) {              if (fold(FnVec, i, j)) { diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 73ec9c107637..8f858d35ea3f 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -48,7 +48,8 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }  Function* PartialInliner::unswitchFunction(Function* F) {    // First, verify that this function is an unswitching candidate...    BasicBlock* entryBlock = F->begin(); -  if (!isa<BranchInst>(entryBlock->getTerminator())) +  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator()); +  if (!BR || BR->isUnconditional())      return 0;    BasicBlock* returnBlock = 0; diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 2b52f464b674..daf81e9259da 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -19,6 +19,7 @@  #include "llvm/CallGraphSCCPass.h"  #include "llvm/Constants.h"  #include "llvm/Function.h" +#include "llvm/LLVMContext.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h"  #include "llvm/Analysis/CallGraph.h" @@ -40,7 +41,7 @@ namespace {      PruneEH() : CallGraphSCCPass(&ID) {}      // runOnSCC - Analyze the SCC, performing the transformation if possible. -    bool runOnSCC(const std::vector<CallGraphNode *> &SCC); +    bool runOnSCC(std::vector<CallGraphNode *> &SCC);      bool SimplifyFunction(Function *F);      void DeleteBasicBlock(BasicBlock *BB); @@ -54,7 +55,7 @@ X("prune-eh", "Remove unused exception handling info");  Pass *llvm::createPruneEHPass() { return new PruneEH(); } -bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool PruneEH::runOnSCC(std::vector<CallGraphNode *> &SCC) {    SmallPtrSet<CallGraphNode *, 8> SCCNodes;    CallGraph &CG = getAnalysis<CallGraph>();    bool MadeChange = false; @@ -164,9 +165,6 @@ bool PruneEH::runOnSCC(const std::vector<CallGraphNode *> &SCC) {  // function if we have invokes to non-unwinding functions or code after calls to  // no-return functions.  bool PruneEH::SimplifyFunction(Function *F) { -  CallGraph &CG = getAnalysis<CallGraph>(); -  CallGraphNode *CGN = CG[F]; -    bool MadeChange = false;    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {      if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) @@ -180,14 +178,13 @@ bool PruneEH::SimplifyFunction(Function *F) {          Call->setAttributes(II->getAttributes());          // Anything that used the value produced by the invoke instruction -        // now uses the value produced by the call instruction. +        // now uses the value produced by the call instruction.  Note that we +        // do this even for void functions and calls with no uses so that the +        // callgraph edge is updated.          II->replaceAllUsesWith(Call);          BasicBlock *UnwindBlock = II->getUnwindDest();          UnwindBlock->removePredecessor(II->getParent()); -        // Fix up the call graph. -        CGN->replaceCallSite(II, Call); -          // Insert a branch to the normal destination right before the          // invoke.          BranchInst::Create(II->getNormalDest(), II); @@ -214,7 +211,7 @@ bool PruneEH::SimplifyFunction(Function *F) {            // Remove the uncond branch and add an unreachable.            BB->getInstList().pop_back(); -          new UnreachableInst(BB); +          new UnreachableInst(BB->getContext(), BB);            DeleteBasicBlock(New);  // Delete the new BB.            MadeChange = true; diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp index 99003689fb1f..4c1f26d50d30 100644 --- a/lib/Transforms/IPO/RaiseAllocations.cpp +++ b/lib/Transforms/IPO/RaiseAllocations.cpp @@ -16,6 +16,7 @@  #include "llvm/Transforms/IPO.h"  #include "llvm/Constants.h"  #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Instructions.h"  #include "llvm/Pass.h" @@ -69,7 +70,6 @@ ModulePass *llvm::createRaiseAllocationsPass() {  // function into the appropriate instruction.  //  void RaiseAllocations::doInitialization(Module &M) { -    // Get Malloc and free prototypes if they exist!    MallocFunc = M.getFunction("malloc");    if (MallocFunc) { @@ -77,22 +77,27 @@ void RaiseAllocations::doInitialization(Module &M) {      // Get the expected prototype for malloc      const FunctionType *Malloc1Type =  -      FunctionType::get(PointerType::getUnqual(Type::Int8Ty), -                      std::vector<const Type*>(1, Type::Int64Ty), false); +      FunctionType::get(Type::getInt8PtrTy(M.getContext()), +                      std::vector<const Type*>(1, +                                      Type::getInt64Ty(M.getContext())), false);      // Chck to see if we got the expected malloc      if (TyWeHave != Malloc1Type) {        // Check to see if the prototype is wrong, giving us i8*(i32) * malloc        // This handles the common declaration of: 'void *malloc(unsigned);'        const FunctionType *Malloc2Type =  -        FunctionType::get(PointerType::getUnqual(Type::Int8Ty), -                          std::vector<const Type*>(1, Type::Int32Ty), false); +        FunctionType::get(PointerType::getUnqual( +                          Type::getInt8Ty(M.getContext())), +                          std::vector<const Type*>(1,  +                                      Type::getInt32Ty(M.getContext())), false);        if (TyWeHave != Malloc2Type) {          // Check to see if the prototype is missing, giving us           // i8*(...) * malloc          // This handles the common declaration of: 'void *malloc();'          const FunctionType *Malloc3Type =  -          FunctionType::get(PointerType::getUnqual(Type::Int8Ty), true); +          FunctionType::get(PointerType::getUnqual( +                                    Type::getInt8Ty(M.getContext())),  +                                    true);          if (TyWeHave != Malloc3Type)            // Give up            MallocFunc = 0; @@ -105,19 +110,24 @@ void RaiseAllocations::doInitialization(Module &M) {      const FunctionType* TyWeHave = FreeFunc->getFunctionType();      // Get the expected prototype for void free(i8*) -    const FunctionType *Free1Type = FunctionType::get(Type::VoidTy, -      std::vector<const Type*>(1, PointerType::getUnqual(Type::Int8Ty)), false); +    const FunctionType *Free1Type = +      FunctionType::get(Type::getVoidTy(M.getContext()), +        std::vector<const Type*>(1, PointerType::getUnqual( +                                 Type::getInt8Ty(M.getContext()))),  +                                 false);      if (TyWeHave != Free1Type) {        // Check to see if the prototype was forgotten, giving us         // void (...) * free        // This handles the common forward declaration of: 'void free();' -      const FunctionType* Free2Type = FunctionType::get(Type::VoidTy, true); +      const FunctionType* Free2Type = +                    FunctionType::get(Type::getVoidTy(M.getContext()), true);        if (TyWeHave != Free2Type) {          // One last try, check to see if we can find free as           // int (...)* free.  This handles the case where NOTHING was declared. -        const FunctionType* Free3Type = FunctionType::get(Type::Int32Ty, true); +        const FunctionType* Free3Type = +                    FunctionType::get(Type::getInt32Ty(M.getContext()), true);          if (TyWeHave != Free3Type) {            // Give up. @@ -137,7 +147,7 @@ void RaiseAllocations::doInitialization(Module &M) {  bool RaiseAllocations::runOnModule(Module &M) {    // Find the malloc/free prototypes...    doInitialization(M); - +      bool Changed = false;    // First, process all of the malloc calls... @@ -159,12 +169,15 @@ bool RaiseAllocations::runOnModule(Module &M) {            // If no prototype was provided for malloc, we may need to cast the            // source size. -          if (Source->getType() != Type::Int32Ty) +          if (Source->getType() != Type::getInt32Ty(M.getContext()))              Source =  -              CastInst::CreateIntegerCast(Source, Type::Int32Ty, false/*ZExt*/, +              CastInst::CreateIntegerCast(Source,  +                                          Type::getInt32Ty(M.getContext()),  +                                          false/*ZExt*/,                                            "MallocAmtCast", I); -          MallocInst *MI = new MallocInst(Type::Int8Ty, Source, "", I); +          MallocInst *MI = new MallocInst(Type::getInt8Ty(M.getContext()), +                                          Source, "", I);            MI->takeName(I);            I->replaceAllUsesWith(MI); @@ -216,7 +229,7 @@ bool RaiseAllocations::runOnModule(Module &M) {            Value *Source = *CS.arg_begin();            if (!isa<PointerType>(Source->getType()))              Source = new IntToPtrInst(Source,            -                                      PointerType::getUnqual(Type::Int8Ty),  +                        Type::getInt8PtrTy(M.getContext()),                                         "FreePtrCast", I);            new FreeInst(Source, I); @@ -226,7 +239,7 @@ bool RaiseAllocations::runOnModule(Module &M) {              BranchInst::Create(II->getNormalDest(), I);            // Delete the old call site -          if (I->getType() != Type::VoidTy) +          if (I->getType() != Type::getVoidTy(M.getContext()))              I->replaceAllUsesWith(UndefValue::get(I->getType()));            I->eraseFromParent();            Changed = true; diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 046e0441b1dc..77d44b27e208 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -24,18 +24,18 @@  #include "llvm/Constants.h"  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Analysis/DebugInfo.h"  #include "llvm/ValueSymbolTable.h"  #include "llvm/TypeSymbolTable.h"  #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h"  #include "llvm/ADT/SmallPtrSet.h"  using namespace llvm;  namespace { -  class VISIBILITY_HIDDEN StripSymbols : public ModulePass { +  class StripSymbols : public ModulePass {      bool OnlyDebugInfo;    public:      static char ID; // Pass identification, replacement for typeid @@ -49,7 +49,7 @@ namespace {      }    }; -  class VISIBILITY_HIDDEN StripNonDebugSymbols : public ModulePass { +  class StripNonDebugSymbols : public ModulePass {    public:      static char ID; // Pass identification, replacement for typeid      explicit StripNonDebugSymbols() @@ -62,7 +62,7 @@ namespace {      }    }; -  class VISIBILITY_HIDDEN StripDebugDeclare : public ModulePass { +  class StripDebugDeclare : public ModulePass {    public:      static char ID; // Pass identification, replacement for typeid      explicit StripDebugDeclare() @@ -138,7 +138,7 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {      Value *V = VI->getValue();      ++VI;      if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) { -      if (!PreserveDbgInfo || strncmp(V->getNameStart(), "llvm.dbg", 8)) +      if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))          // Set name to "", removing from symbol table!          V->setName("");      } @@ -156,43 +156,37 @@ static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {  }  /// Find values that are marked as llvm.used. -void findUsedValues(Module &M, -                    SmallPtrSet<const GlobalValue*, 8>& llvmUsedValues) { -  if (GlobalVariable *LLVMUsed = M.getGlobalVariable("llvm.used")) { -    llvmUsedValues.insert(LLVMUsed); -    // Collect values that are preserved as per explicit request. -    // llvm.used is used to list these values. -    if (ConstantArray *Inits =  -        dyn_cast<ConstantArray>(LLVMUsed->getInitializer())) { -      for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { -        if (GlobalValue *GV = dyn_cast<GlobalValue>(Inits->getOperand(i))) -          llvmUsedValues.insert(GV); -        else if (ConstantExpr *CE = -                 dyn_cast<ConstantExpr>(Inits->getOperand(i))) -          if (CE->getOpcode() == Instruction::BitCast) -            if (GlobalValue *GV = dyn_cast<GlobalValue>(CE->getOperand(0))) -              llvmUsedValues.insert(GV); -      } -    } -  } +static void findUsedValues(GlobalVariable *LLVMUsed, +                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) { +  if (LLVMUsed == 0) return; +  UsedValues.insert(LLVMUsed); +   +  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer()); +  if (Inits == 0) return; +   +  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) +    if (GlobalValue *GV =  +          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts())) +      UsedValues.insert(GV);  }  /// StripSymbolNames - Strip symbol names. -bool StripSymbolNames(Module &M, bool PreserveDbgInfo) { +static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {    SmallPtrSet<const GlobalValue*, 8> llvmUsedValues; -  findUsedValues(M, llvmUsedValues); +  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues); +  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);    for (Module::global_iterator I = M.global_begin(), E = M.global_end();         I != E; ++I) {      if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) -      if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) +      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))          I->setName("");     // Internal symbols can't participate in linkage    }    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {      if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0) -      if (!PreserveDbgInfo || strncmp(I->getNameStart(), "llvm.dbg", 8)) +      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))          I->setName("");     // Internal symbols can't participate in linkage      StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);    } @@ -206,169 +200,58 @@ bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {  // StripDebugInfo - Strip debug info in the module if it exists.    // To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and   // llvm.dbg.region.end calls, and any globals they point to if now dead. -bool StripDebugInfo(Module &M) { - -  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues; -  findUsedValues(M, llvmUsedValues); - -  SmallVector<GlobalVariable *, 2> CUs; -  SmallVector<GlobalVariable *, 4> GVs; -  SmallVector<GlobalVariable *, 4> SPs; -  CollectDebugInfoAnchors(M, CUs, GVs, SPs); -  // These anchors use LinkOnce linkage so that the optimizer does not -  // remove them accidently. Set InternalLinkage for all these debug -  // info anchors. -  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(), -         E = CUs.end(); I != E; ++I) -    (*I)->setLinkage(GlobalValue::InternalLinkage); -  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(), -         E = GVs.end(); I != E; ++I) -    (*I)->setLinkage(GlobalValue::InternalLinkage); -  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(), -         E = SPs.end(); I != E; ++I) -    (*I)->setLinkage(GlobalValue::InternalLinkage); - - - // Delete all dbg variables. -  for (Module::global_iterator I = M.global_begin(), E = M.global_end();  -       I != E; ++I) { -    GlobalVariable *GV = dyn_cast<GlobalVariable>(I); -    if (!GV) continue; -    if (!GV->use_empty() && llvmUsedValues.count(I) == 0) { -      if (strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0) { -        GV->replaceAllUsesWith(UndefValue::get(GV->getType())); -      } -    } -  } +static bool StripDebugInfo(Module &M) { +  // Remove all of the calls to the debugger intrinsics, and remove them from +  // the module.    Function *FuncStart = M.getFunction("llvm.dbg.func.start");    Function *StopPoint = M.getFunction("llvm.dbg.stoppoint");    Function *RegionStart = M.getFunction("llvm.dbg.region.start");    Function *RegionEnd = M.getFunction("llvm.dbg.region.end");    Function *Declare = M.getFunction("llvm.dbg.declare"); -  std::vector<Constant*> DeadConstants; - -  // Remove all of the calls to the debugger intrinsics, and remove them from -  // the module.    if (FuncStart) {      while (!FuncStart->use_empty()) {        CallInst *CI = cast<CallInst>(FuncStart->use_back()); -      Value *Arg = CI->getOperand(1); -      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");        CI->eraseFromParent(); -      if (Arg->use_empty()) -        if (Constant *C = dyn_cast<Constant>(Arg))  -          DeadConstants.push_back(C);      }      FuncStart->eraseFromParent();    }    if (StopPoint) {      while (!StopPoint->use_empty()) {        CallInst *CI = cast<CallInst>(StopPoint->use_back()); -      Value *Arg = CI->getOperand(3); -      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");        CI->eraseFromParent(); -      if (Arg->use_empty()) -        if (Constant *C = dyn_cast<Constant>(Arg))  -          DeadConstants.push_back(C);      }      StopPoint->eraseFromParent();    }    if (RegionStart) {      while (!RegionStart->use_empty()) {        CallInst *CI = cast<CallInst>(RegionStart->use_back()); -      Value *Arg = CI->getOperand(1); -      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");        CI->eraseFromParent(); -      if (Arg->use_empty()) -        if (Constant *C = dyn_cast<Constant>(Arg))  -          DeadConstants.push_back(C);      }      RegionStart->eraseFromParent();    }    if (RegionEnd) {      while (!RegionEnd->use_empty()) {        CallInst *CI = cast<CallInst>(RegionEnd->use_back()); -      Value *Arg = CI->getOperand(1); -      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");        CI->eraseFromParent(); -      if (Arg->use_empty()) -        if (Constant *C = dyn_cast<Constant>(Arg))  -          DeadConstants.push_back(C);      }      RegionEnd->eraseFromParent();    }    if (Declare) {      while (!Declare->use_empty()) {        CallInst *CI = cast<CallInst>(Declare->use_back()); -      Value *Arg1 = CI->getOperand(1); -      Value *Arg2 = CI->getOperand(2); -      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");        CI->eraseFromParent(); -      if (Arg1->use_empty()) { -        if (Constant *C = dyn_cast<Constant>(Arg1))  -          DeadConstants.push_back(C); -        else  -          RecursivelyDeleteTriviallyDeadInstructions(Arg1); -      } -      if (Arg2->use_empty()) -        if (Constant *C = dyn_cast<Constant>(Arg2))  -          DeadConstants.push_back(C);      }      Declare->eraseFromParent();    } -  // llvm.dbg.compile_units and llvm.dbg.subprograms are marked as linkonce -  // but since we are removing all debug information, make them internal now. -  // FIXME: Use private linkage maybe? -  if (Constant *C = M.getNamedGlobal("llvm.dbg.compile_units")) -    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) -      GV->setLinkage(GlobalValue::InternalLinkage); - -  if (Constant *C = M.getNamedGlobal("llvm.dbg.subprograms")) -    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) -      GV->setLinkage(GlobalValue::InternalLinkage); -  -  if (Constant *C = M.getNamedGlobal("llvm.dbg.global_variables")) -    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) -      GV->setLinkage(GlobalValue::InternalLinkage); - -  // Delete all dbg variables. -  for (Module::global_iterator I = M.global_begin(), E = M.global_end();  -       I != E; ++I) { -    GlobalVariable *GV = dyn_cast<GlobalVariable>(I); -    if (!GV) continue; -    if (GV->use_empty() && llvmUsedValues.count(I) == 0 -        && (!GV->hasSection()  -            || strcmp(GV->getSection().c_str(), "llvm.metadata") == 0)) -      DeadConstants.push_back(GV); -  } - -  if (DeadConstants.empty()) -    return false; +  NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); +  if (NMD) +    NMD->eraseFromParent(); -  // Delete any internal globals that were only used by the debugger intrinsics. -  while (!DeadConstants.empty()) { -    Constant *C = DeadConstants.back(); -    DeadConstants.pop_back(); -    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { -      if (GV->hasLocalLinkage()) -        RemoveDeadConstant(GV); -    } -    else -      RemoveDeadConstant(C); -  } - -  // Remove all llvm.dbg types. -  TypeSymbolTable &ST = M.getTypeSymbolTable(); -  for (TypeSymbolTable::iterator TI = ST.begin(), TE = ST.end(); TI != TE; ) { -    if (!strncmp(TI->first.c_str(), "llvm.dbg.", 9)) -      ST.remove(TI++); -    else  -      ++TI; -  } -   +  // Remove dead metadata. +  M.getContext().RemoveDeadMetadata();    return true;  } @@ -414,8 +297,7 @@ bool StripDebugDeclare::runOnModule(Module &M) {         I != E; ++I) {      GlobalVariable *GV = dyn_cast<GlobalVariable>(I);      if (!GV) continue; -    if (GV->use_empty() && GV->hasName()  -        && strncmp(GV->getNameStart(), "llvm.dbg.global_variable", 24) == 0) +    if (GV->use_empty() && GV->getName().startswith("llvm.dbg.global_variable"))        DeadConstants.push_back(GV);    } diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index 9f54388aa45e..4442820a284b 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -23,6 +23,7 @@  #include "llvm/Transforms/IPO.h"  #include "llvm/Constants.h"  #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/CallGraphSCCPass.h"  #include "llvm/Instructions.h" @@ -34,6 +35,7 @@  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h"  using namespace llvm;  STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); @@ -47,15 +49,15 @@ namespace {        CallGraphSCCPass::getAnalysisUsage(AU);      } -    virtual bool runOnSCC(const std::vector<CallGraphNode *> &SCC); +    virtual bool runOnSCC(std::vector<CallGraphNode *> &SCC);      static char ID; // Pass identification, replacement for typeid      SRETPromotion() : CallGraphSCCPass(&ID) {}    private: -    bool PromoteReturn(CallGraphNode *CGN); +    CallGraphNode *PromoteReturn(CallGraphNode *CGN);      bool isSafeToUpdateAllCallers(Function *F);      Function *cloneFunctionBody(Function *F, const StructType *STy); -    void updateCallSites(Function *F, Function *NF); +    CallGraphNode *updateCallSites(Function *F, Function *NF);      bool nestedStructType(const StructType *STy);    };  } @@ -68,49 +70,54 @@ Pass *llvm::createStructRetPromotionPass() {    return new SRETPromotion();  } -bool SRETPromotion::runOnSCC(const std::vector<CallGraphNode *> &SCC) { +bool SRETPromotion::runOnSCC(std::vector<CallGraphNode *> &SCC) {    bool Changed = false;    for (unsigned i = 0, e = SCC.size(); i != e; ++i) -    Changed |= PromoteReturn(SCC[i]); +    if (CallGraphNode *NewNode = PromoteReturn(SCC[i])) { +      SCC[i] = NewNode; +      Changed = true; +    }    return Changed;  }  /// PromoteReturn - This method promotes function that uses StructRet paramater  -/// into a function that uses mulitple return value. -bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) { +/// into a function that uses multiple return values. +CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {    Function *F = CGN->getFunction();    if (!F || F->isDeclaration() || !F->hasLocalLinkage()) -    return false; +    return 0;    // Make sure that function returns struct.    if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) -    return false; +    return 0; -  DOUT << "SretPromotion: Looking at sret function " << F->getNameStart() << "\n"; +  DEBUG(errs() << "SretPromotion: Looking at sret function "  +        << F->getName() << "\n"); -  assert (F->getReturnType() == Type::VoidTy && "Invalid function return type"); +  assert(F->getReturnType() == Type::getVoidTy(F->getContext()) && +         "Invalid function return type");    Function::arg_iterator AI = F->arg_begin();    const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType()); -  assert (FArgType && "Invalid sret parameter type"); +  assert(FArgType && "Invalid sret parameter type");    const llvm::StructType *STy =       dyn_cast<StructType>(FArgType->getElementType()); -  assert (STy && "Invalid sret parameter element type"); +  assert(STy && "Invalid sret parameter element type");    // Check if it is ok to perform this promotion.    if (isSafeToUpdateAllCallers(F) == false) { -    DOUT << "SretPromotion: Not all callers can be updated\n"; +    DEBUG(errs() << "SretPromotion: Not all callers can be updated\n");      NumRejectedSRETUses++; -    return false; +    return 0;    } -  DOUT << "SretPromotion: sret argument will be promoted\n"; +  DEBUG(errs() << "SretPromotion: sret argument will be promoted\n");    NumSRET++;    // [1] Replace use of sret parameter  -  AllocaInst *TheAlloca = new AllocaInst (STy, NULL, "mrv",  -                                          F->getEntryBlock().begin()); +  AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv",  +                                         F->getEntryBlock().begin());    Value *NFirstArg = F->arg_begin();    NFirstArg->replaceAllUsesWith(TheAlloca); @@ -121,7 +128,7 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {        ++BI;        if (isa<ReturnInst>(I)) {          Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); -        ReturnInst *NR = ReturnInst::Create(NV, I); +        ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I);          I->replaceAllUsesWith(NR);          I->eraseFromParent();        } @@ -131,11 +138,13 @@ bool SRETPromotion::PromoteReturn(CallGraphNode *CGN) {    Function *NF = cloneFunctionBody(F, STy);    // [4] Update all call sites to use new function -  updateCallSites(F, NF); +  CallGraphNode *NF_CFN = updateCallSites(F, NF); -  F->eraseFromParent(); -  getAnalysis<CallGraph>().changeFunction(F, NF); -  return true; +  CallGraph &CG = getAnalysis<CallGraph>(); +  NF_CFN->stealCalledFunctionsFrom(CG[F]); + +  delete CG.removeFunctionFromModule(F); +  return NF_CFN;  }  // Check if it is ok to perform this promotion. @@ -243,23 +252,26 @@ Function *SRETPromotion::cloneFunctionBody(Function *F,    Function::arg_iterator NI = NF->arg_begin();    ++I;    while (I != E) { -      I->replaceAllUsesWith(NI); -      NI->takeName(I); -      ++I; -      ++NI; +    I->replaceAllUsesWith(NI); +    NI->takeName(I); +    ++I; +    ++NI;    }    return NF;  }  /// updateCallSites - Update all sites that call F to use NF. -void SRETPromotion::updateCallSites(Function *F, Function *NF) { +CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {    CallGraph &CG = getAnalysis<CallGraph>();    SmallVector<Value*, 16> Args;    // Attributes - Keep track of the parameter attributes for the arguments.    SmallVector<AttributeWithIndex, 8> ArgAttrsVec; +  // Get a new callgraph node for NF. +  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); +    while (!F->use_empty()) {      CallSite CS = CallSite::get(*F->use_begin());      Instruction *Call = CS.getInstruction(); @@ -309,8 +321,10 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) {      New->takeName(Call);      // Update the callgraph to know that the callsite has been transformed. -    CG[Call->getParent()->getParent()]->replaceCallSite(Call, New); - +    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; +    CalleeNode->removeCallEdgeFor(Call); +    CalleeNode->addCalledFunction(New, NF_CGN); +          // Update all users of sret parameter to extract value using extractvalue.      for (Value::use_iterator UI = FirstCArg->use_begin(),              UE = FirstCArg->use_end(); UI != UE; ) { @@ -318,24 +332,25 @@ void SRETPromotion::updateCallSites(Function *F, Function *NF) {        CallInst *C2 = dyn_cast<CallInst>(U2);        if (C2 && (C2 == Call))          continue; -      else if (GetElementPtrInst *UGEP = dyn_cast<GetElementPtrInst>(U2)) { -        ConstantInt *Idx = dyn_cast<ConstantInt>(UGEP->getOperand(2)); -        assert (Idx && "Unexpected getelementptr index!"); -        Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), -                                             "evi", UGEP); -        while(!UGEP->use_empty()) { -          // isSafeToUpdateAllCallers has checked that all GEP uses are -          // LoadInsts -          LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); -          L->replaceAllUsesWith(GR); -          L->eraseFromParent(); -        } -        UGEP->eraseFromParent(); +       +      GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2); +      ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2)); +      Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), +                                           "evi", UGEP); +      while(!UGEP->use_empty()) { +        // isSafeToUpdateAllCallers has checked that all GEP uses are +        // LoadInsts +        LoadInst *L = cast<LoadInst>(*UGEP->use_begin()); +        L->replaceAllUsesWith(GR); +        L->eraseFromParent();        } -      else assert( 0 && "Unexpected sret parameter use"); +      UGEP->eraseFromParent(); +      continue;      }      Call->eraseFromParent();    } +   +  return NF_CGN;  }  /// nestedStructType - Return true if STy includes any @@ -344,7 +359,7 @@ bool SRETPromotion::nestedStructType(const StructType *STy) {    unsigned Num = STy->getNumElements();    for (unsigned i = 0; i < Num; i++) {      const Type *Ty = STy->getElementType(i); -    if (!Ty->isSingleValueType() && Ty != Type::VoidTy) +    if (!Ty->isSingleValueType() && Ty != Type::getVoidTy(STy->getContext()))        return true;    }    return false; diff --git a/lib/Transforms/Instrumentation/BlockProfiling.cpp b/lib/Transforms/Instrumentation/BlockProfiling.cpp index 2bd9809a3961..eb8f22585b62 100644 --- a/lib/Transforms/Instrumentation/BlockProfiling.cpp +++ b/lib/Transforms/Instrumentation/BlockProfiling.cpp @@ -19,12 +19,11 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/Constants.h"  #include "llvm/DerivedTypes.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Instrumentation.h"  #include "RSProfiling.h"  #include "ProfilingUtils.h" @@ -52,8 +51,8 @@ ModulePass *llvm::createFunctionProfilerPass() {  bool FunctionProfiler::runOnModule(Module &M) {    Function *Main = M.getFunction("main");    if (Main == 0) { -    cerr << "WARNING: cannot insert function profiling into a module" -         << " with no main function!\n"; +    errs() << "WARNING: cannot insert function profiling into a module" +           << " with no main function!\n";      return false;  // No main, no instrumentation!    } @@ -62,10 +61,11 @@ bool FunctionProfiler::runOnModule(Module &M) {      if (!I->isDeclaration())        ++NumFunctions; -  const Type *ATy = ArrayType::get(Type::Int32Ty, NumFunctions); +  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), +                                   NumFunctions);    GlobalVariable *Counters = -    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, -                       Constant::getNullValue(ATy), "FuncProfCounters", &M); +    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, +                       Constant::getNullValue(ATy), "FuncProfCounters");    // Instrument all of the functions...    unsigned i = 0; @@ -98,26 +98,29 @@ ModulePass *llvm::createBlockProfilerPass() { return new BlockProfiler(); }  bool BlockProfiler::runOnModule(Module &M) {    Function *Main = M.getFunction("main");    if (Main == 0) { -    cerr << "WARNING: cannot insert block profiling into a module" -         << " with no main function!\n"; +    errs() << "WARNING: cannot insert block profiling into a module" +           << " with no main function!\n";      return false;  // No main, no instrumentation!    }    unsigned NumBlocks = 0;    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) -    NumBlocks += I->size(); +    if (!I->isDeclaration()) +      NumBlocks += I->size(); -  const Type *ATy = ArrayType::get(Type::Int32Ty, NumBlocks); +  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumBlocks);    GlobalVariable *Counters = -    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, -                       Constant::getNullValue(ATy), "BlockProfCounters", &M); +    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, +                       Constant::getNullValue(ATy), "BlockProfCounters");    // Instrument all of the blocks...    unsigned i = 0; -  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) +  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { +    if (I->isDeclaration()) continue;      for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)        // Insert counter at the start of the block        IncrementCounterInBlock(BB, i++, Counters); +  }    // Add the initialization call to main.    InsertProfilingInitCall(Main, "llvm_start_block_profiling", Counters); diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index d7c518d282f8..494928e43814 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,6 +1,7 @@  add_llvm_library(LLVMInstrumentation    BlockProfiling.cpp    EdgeProfiling.cpp +  OptimalEdgeProfiling.cpp    ProfilingUtils.cpp    RSProfiling.cpp    ) diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp index 0831f3b7a480..b9cb275578e0 100644 --- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp @@ -16,25 +16,30 @@  // number of counters inserted.  //  //===----------------------------------------------------------------------===// - +#define DEBUG_TYPE "insert-edge-profiling"  #include "ProfilingUtils.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/Statistic.h"  #include <set>  using namespace llvm; +STATISTIC(NumEdgesInserted, "The # of edges inserted."); +  namespace {    class VISIBILITY_HIDDEN EdgeProfiler : public ModulePass {      bool runOnModule(Module &M);    public:      static char ID; // Pass identification, replacement for typeid      EdgeProfiler() : ModulePass(&ID) {} + +    virtual const char *getPassName() const { +      return "Edge Profiler"; +    }    };  } @@ -47,14 +52,17 @@ ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }  bool EdgeProfiler::runOnModule(Module &M) {    Function *Main = M.getFunction("main");    if (Main == 0) { -    cerr << "WARNING: cannot insert edge profiling into a module" -         << " with no main function!\n"; +    errs() << "WARNING: cannot insert edge profiling into a module" +           << " with no main function!\n";      return false;  // No main, no instrumentation!    }    std::set<BasicBlock*> BlocksToInstrument;    unsigned NumEdges = 0; -  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) +  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { +    if (F->isDeclaration()) continue; +    // Reserve space for (0,entry) edge. +    ++NumEdges;      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {        // Keep track of which blocks need to be instrumented.  We don't want to        // instrument blocks that are added as the result of breaking critical @@ -62,15 +70,20 @@ bool EdgeProfiler::runOnModule(Module &M) {        BlocksToInstrument.insert(BB);        NumEdges += BB->getTerminator()->getNumSuccessors();      } +  } -  const Type *ATy = ArrayType::get(Type::Int32Ty, NumEdges); +  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);    GlobalVariable *Counters = -    new GlobalVariable(ATy, false, GlobalValue::InternalLinkage, -                       Constant::getNullValue(ATy), "EdgeProfCounters", &M); +    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, +                       Constant::getNullValue(ATy), "EdgeProfCounters"); +  NumEdgesInserted = NumEdges;    // Instrument all of the edges...    unsigned i = 0; -  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) +  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { +    if (F->isDeclaration()) continue; +    // Create counter for (0,entry) edge. +    IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)        if (BlocksToInstrument.count(BB)) {  // Don't instrument inserted blocks          // Okay, we have to add a counter of each outgoing edge.  If the @@ -93,6 +106,7 @@ bool EdgeProfiler::runOnModule(Module &M) {            }          }        } +  }    // Add the initialization call to main.    InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters); diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h new file mode 100644 index 000000000000..2951dbcea9a1 --- /dev/null +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -0,0 +1,95 @@ +//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This module privides means for calculating a maximum spanning tree for a +// given set of weighted edges. The type parameter T is the type of a node. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H + +#include "llvm/ADT/EquivalenceClasses.h" +#include <vector> +#include <algorithm> + +namespace llvm { + +  /// MaximumSpanningTree - A MST implementation. +  /// The type parameter T determines the type of the nodes of the graph. +  template <typename T> +  class MaximumSpanningTree { + +    // A comparing class for comparing weighted edges. +    template <typename CT> +    struct EdgeWeightCompare { +      bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X,  +                      typename MaximumSpanningTree<CT>::EdgeWeight Y) const { +        if (X.second > Y.second) return true; +        if (X.second < Y.second) return false; +        return false; +      } +    }; + +  public: +    typedef std::pair<const T*, const T*> Edge; +    typedef std::pair<Edge, double> EdgeWeight; +    typedef std::vector<EdgeWeight> EdgeWeights; +  protected: +    typedef std::vector<Edge> MaxSpanTree; + +    MaxSpanTree MST; + +  public: +    static char ID; // Class identification, replacement for typeinfo + +    /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a +    /// spanning tree. +    MaximumSpanningTree(EdgeWeights &EdgeVector) { + +      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>()); + +      // Create spanning tree, Forest contains a special data structure +      // that makes checking if two nodes are already in a common (sub-)tree +      // fast and cheap. +      EquivalenceClasses<const T*> Forest; +      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), +           EWe = EdgeVector.end(); EWi != EWe; ++EWi) { +        Edge e = (*EWi).first; + +        Forest.insert(e.first); +        Forest.insert(e.second); +      } + +      // Iterate over the sorted edges, biggest first. +      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(), +           EWe = EdgeVector.end(); EWi != EWe; ++EWi) { +        Edge e = (*EWi).first; + +        if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) { +          Forest.unionSets(e.first, e.second); +          // So we know now that the edge is not already in a subtree, so we push +          // the edge to the MST. +          MST.push_back(e); +        } +      } +    } + +    typename MaxSpanTree::iterator begin() { +      return MST.begin(); +    } + +    typename MaxSpanTree::iterator end() { +      return MST.end(); +    } +  }; + +} // End llvm namespace + +#endif diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp new file mode 100644 index 000000000000..b2e6747ca0e9 --- /dev/null +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -0,0 +1,219 @@ +//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===// +// +//                      The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass instruments the specified program with counters for edge profiling. +// Edge profiling can give a reasonable approximation of the hot paths through a +// program, and is used for a wide variety of program transformations. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "insert-optimal-edge-profiling" +#include "ProfilingUtils.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Analysis/ProfileInfoLoader.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" +#include "MaximumSpanningTree.h" +#include <set> +using namespace llvm; + +STATISTIC(NumEdgesInserted, "The # of edges inserted."); + +namespace { +  class VISIBILITY_HIDDEN OptimalEdgeProfiler : public ModulePass { +    bool runOnModule(Module &M); +  public: +    static char ID; // Pass identification, replacement for typeid +    OptimalEdgeProfiler() : ModulePass(&ID) {} + +    void getAnalysisUsage(AnalysisUsage &AU) const { +      AU.addRequiredID(ProfileEstimatorPassID); +      AU.addRequired<ProfileInfo>(); +    } + +    virtual const char *getPassName() const { +      return "Optimal Edge Profiler"; +    } +  }; +} + +char OptimalEdgeProfiler::ID = 0; +static RegisterPass<OptimalEdgeProfiler> +X("insert-optimal-edge-profiling",  +  "Insert optimal instrumentation for edge profiling"); + +ModulePass *llvm::createOptimalEdgeProfilerPass() { +  return new OptimalEdgeProfiler(); +} + +inline static void printEdgeCounter(ProfileInfo::Edge e, +                                    BasicBlock* b, +                                    unsigned i) { +  DEBUG(errs() << "--Edge Counter for " << (e) << " in " \ +               << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n"); +} + +bool OptimalEdgeProfiler::runOnModule(Module &M) { +  Function *Main = M.getFunction("main"); +  if (Main == 0) { +    errs() << "WARNING: cannot insert edge profiling into a module" +           << " with no main function!\n"; +    return false;  // No main, no instrumentation! +  } + +  // NumEdges counts all the edges that may be instrumented. Later on its +  // decided which edges to actually instrument, to achieve optimal profiling. +  // For the entry block a virtual edge (0,entry) is reserved, for each block +  // with no successors an edge (BB,0) is reserved. These edges are necessary +  // to calculate a truly optimal maximum spanning tree and thus an optimal +  // instrumentation. +  unsigned NumEdges = 0; + +  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { +    if (F->isDeclaration()) continue; +    // Reserve space for (0,entry) edge. +    ++NumEdges; +    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { +      // Keep track of which blocks need to be instrumented.  We don't want to +      // instrument blocks that are added as the result of breaking critical +      // edges! +      if (BB->getTerminator()->getNumSuccessors() == 0) { +        // Reserve space for (BB,0) edge. +        ++NumEdges; +      } else { +        NumEdges += BB->getTerminator()->getNumSuccessors(); +      } +    } +  } + +  // In the profiling output a counter for each edge is reserved, but only few +  // are used. This is done to be able to read back in the profile without +  // calulating the maximum spanning tree again, instead each edge counter that +  // is not used is initialised with -1 to signal that this edge counter has to +  // be calculated from other edge counters on reading the profile info back +  // in. + +  const Type *Int32 = Type::getInt32Ty(M.getContext()); +  const ArrayType *ATy = ArrayType::get(Int32, NumEdges); +  GlobalVariable *Counters = +    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage, +                       Constant::getNullValue(ATy), "OptEdgeProfCounters"); +  NumEdgesInserted = 0; + +  std::vector<Constant*> Initializer(NumEdges); +  Constant* Zero = ConstantInt::get(Int32, 0); +  Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); + +  // Instrument all of the edges not in MST... +  unsigned i = 0; +  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { +    if (F->isDeclaration()) continue; +    DEBUG(errs()<<"Working on "<<F->getNameStr()<<"\n"); + +    // Calculate a Maximum Spanning Tree with the edge weights determined by +    // ProfileEstimator. ProfileEstimator also assign weights to the virtual +    // edges (0,entry) and (BB,0) (for blocks with no successors) and this +    // edges also participate in the maximum spanning tree calculation.  +    // The third parameter of MaximumSpanningTree() has the effect that not the +    // actual MST is returned but the edges _not_ in the MST. + +    ProfileInfo::EdgeWeights ECs =  +      getAnalysisID<ProfileInfo>(ProfileEstimatorPassID, *F).getEdgeWeights(F); +    std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end()); +    MaximumSpanningTree<BasicBlock> MST (EdgeVector); +    std::stable_sort(MST.begin(),MST.end()); + +    // Check if (0,entry) not in the MST. If not, instrument edge +    // (IncrementCounterInBlock()) and set the counter initially to zero, if +    // the edge is in the MST the counter is initialised to -1. + +    BasicBlock *entry = &(F->getEntryBlock()); +    ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); +    if (!std::binary_search(MST.begin(), MST.end(), edge)) { +      printEdgeCounter(edge,entry,i); +      IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++; +      Initializer[i++] = (Zero); +    } else{ +      Initializer[i++] = (Uncounted); +    } + +    // InsertedBlocks contains all blocks that were inserted for splitting an +    // edge, this blocks do not have to be instrumented. +    DenseSet<BasicBlock*> InsertedBlocks; +    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { +      // Check if block was not inserted and thus does not have to be +      // instrumented. +      if (InsertedBlocks.count(BB)) continue; + +      // Okay, we have to add a counter of each outgoing edge not in MST. If +      // the outgoing edge is not critical don't split it, just insert the +      // counter in the source or destination of the edge. Also, if the block +      // has no successors, the virtual edge (BB,0) is processed. +      TerminatorInst *TI = BB->getTerminator(); +      if (TI->getNumSuccessors() == 0) { +        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); +        if (!std::binary_search(MST.begin(), MST.end(), edge)) { +          printEdgeCounter(edge,BB,i); +          IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; +          Initializer[i++] = (Zero); +        } else{ +          Initializer[i++] = (Uncounted); +        } +      } +      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { +        BasicBlock *Succ = TI->getSuccessor(s); +        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ); +        if (!std::binary_search(MST.begin(), MST.end(), edge)) { + +          // If the edge is critical, split it. +          bool wasInserted = SplitCriticalEdge(TI, s, this); +          Succ = TI->getSuccessor(s); +          if (wasInserted) +            InsertedBlocks.insert(Succ); + +          // Okay, we are guaranteed that the edge is no longer critical.  If +          // we only have a single successor, insert the counter in this block, +          // otherwise insert it in the successor block. +          if (TI->getNumSuccessors() == 1) { +            // Insert counter at the start of the block +            printEdgeCounter(edge,BB,i); +            IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; +          } else { +            // Insert counter at the start of the block +            printEdgeCounter(edge,Succ,i); +            IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++; +          } +          Initializer[i++] = (Zero); +        } else { +          Initializer[i++] = (Uncounted); +        } +      } +    } +  } + +  // Check if the number of edges counted at first was the number of edges we +  // considered for instrumentation. +  assert(i==NumEdges && "the number of edges in counting array is wrong"); + +  // Assing the now completely defined initialiser to the array. +  Constant *init = ConstantArray::get(ATy, Initializer); +  Counters->setInitializer(init); + +  // Add the initialization call to main. +  InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters); +  return true; +} + diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 48071f115692..1679bea08c19 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -18,22 +18,27 @@  #include "llvm/Constants.h"  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,                                     GlobalValue *Array) { +  LLVMContext &Context = MainFn->getContext();    const Type *ArgVTy =  -    PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); -  const PointerType *UIntPtr = PointerType::getUnqual(Type::Int32Ty); +    PointerType::getUnqual(Type::getInt8PtrTy(Context)); +  const PointerType *UIntPtr = +        Type::getInt32PtrTy(Context);    Module &M = *MainFn->getParent(); -  Constant *InitFn = M.getOrInsertFunction(FnName, Type::Int32Ty, Type::Int32Ty, -                                           ArgVTy, UIntPtr, Type::Int32Ty, +  Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context), +                                           Type::getInt32Ty(Context), +                                           ArgVTy, UIntPtr, +                                           Type::getInt32Ty(Context),                                             (Type *)0);    // This could force argc and argv into programs that wouldn't otherwise have    // them, but instead we just pass null values in.    std::vector<Value*> Args(4); -  Args[0] = Constant::getNullValue(Type::Int32Ty); +  Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));    Args[1] = Constant::getNullValue(ArgVTy);    // Skip over any allocas in the entry block. @@ -41,7 +46,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,    BasicBlock::iterator InsertPos = Entry->begin();    while (isa<AllocaInst>(InsertPos)) ++InsertPos; -  std::vector<Constant*> GEPIndices(2, Constant::getNullValue(Type::Int32Ty)); +  std::vector<Constant*> GEPIndices(2, +                             Constant::getNullValue(Type::getInt32Ty(Context)));    unsigned NumElements = 0;    if (Array) {      Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0], @@ -53,7 +59,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,      // pass null.      Args[2] = ConstantPointerNull::get(UIntPtr);    } -  Args[3] = ConstantInt::get(Type::Int32Ty, NumElements); +  Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);    Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),                                             "newargc", InsertPos); @@ -78,16 +84,18 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,      AI = MainFn->arg_begin();      // If the program looked at argc, have it look at the return value of the      // init call instead. -    if (AI->getType() != Type::Int32Ty) { +    if (AI->getType() != Type::getInt32Ty(Context)) {        Instruction::CastOps opcode;        if (!AI->use_empty()) {          opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);          AI->replaceAllUsesWith(            CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));        } -      opcode = CastInst::getCastOpcode(AI, true, Type::Int32Ty, true); +      opcode = CastInst::getCastOpcode(AI, true, +                                       Type::getInt32Ty(Context), true);        InitCall->setOperand(1,  -          CastInst::Create(opcode, AI, Type::Int32Ty, "argc.cast", InitCall)); +          CastInst::Create(opcode, AI, Type::getInt32Ty(Context), +                           "argc.cast", InitCall));      } else {        AI->replaceAllUsesWith(InitCall);        InitCall->setOperand(1, AI); @@ -104,17 +112,20 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,    while (isa<AllocaInst>(InsertPos))      ++InsertPos; +  LLVMContext &Context = BB->getContext(); +    // Create the getelementptr constant expression    std::vector<Constant*> Indices(2); -  Indices[0] = Constant::getNullValue(Type::Int32Ty); -  Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); +  Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); +  Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);    Constant *ElementPtr =  -    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); +    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], +                                          Indices.size());    // Load, increment and store the value back.    Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);    Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal, -                                         ConstantInt::get(Type::Int32Ty, 1), +                                 ConstantInt::get(Type::getInt32Ty(Context), 1),                                           "NewFuncCounter", InsertPos);    new StoreInst(NewVal, ElementPtr, InsertPos);  } diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp index b110f4eb368b..3b72260db845 100644 --- a/lib/Transforms/Instrumentation/RSProfiling.cpp +++ b/lib/Transforms/Instrumentation/RSProfiling.cpp @@ -33,6 +33,7 @@  //===----------------------------------------------------------------------===//  #include "llvm/Pass.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Instructions.h"  #include "llvm/Constants.h" @@ -43,6 +44,8 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Instrumentation.h"  #include "RSProfiling.h"  #include <set> @@ -197,8 +200,8 @@ GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,                                           uint64_t resetval) : T(t) {    ConstantInt* Init = ConstantInt::get(T, resetval);     ResetValue = Init; -  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, -                               Init, "RandomSteeringCounter", &M); +  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, +                               Init, "RandomSteeringCounter");  }  GlobalRandomCounter::~GlobalRandomCounter() {} @@ -211,8 +214,9 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {    //decrement counter    LoadInst* l = new LoadInst(Counter, "counter", t); -  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),  -                             "countercc", t); +  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, +                             ConstantInt::get(T, 0),  +                             "countercc");    Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),                                          "counternew", t); @@ -221,7 +225,8 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {    //reset counter    BasicBlock* oldnext = t->getSuccessor(0); -  BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(),  +  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), +                                              "reset", oldnext->getParent(),                                                 oldnext);    TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);    t->setSuccessor(0, resetblock); @@ -234,8 +239,8 @@ GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,    : AI(0), T(t) {    ConstantInt* Init = ConstantInt::get(T, resetval);    ResetValue  = Init; -  Counter = new GlobalVariable(T, false, GlobalValue::InternalLinkage, -                               Init, "RandomSteeringCounter", &M); +  Counter = new GlobalVariable(M, T, false, GlobalValue::InternalLinkage, +                               Init, "RandomSteeringCounter");  }  GlobalRandomCounterOpt::~GlobalRandomCounterOpt() {} @@ -283,8 +288,9 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {    //decrement counter    LoadInst* l = new LoadInst(AI, "counter", t); -  ICmpInst* s = new ICmpInst(ICmpInst::ICMP_EQ, l, ConstantInt::get(T, 0),  -                             "countercc", t); +  ICmpInst* s = new ICmpInst(t, ICmpInst::ICMP_EQ, l, +                             ConstantInt::get(T, 0),  +                             "countercc");    Value* nv = BinaryOperator::CreateSub(l, ConstantInt::get(T, 1),                                          "counternew", t); @@ -293,7 +299,8 @@ void GlobalRandomCounterOpt::ProcessChoicePoint(BasicBlock* bb) {    //reset counter    BasicBlock* oldnext = t->getSuccessor(0); -  BasicBlock* resetblock = BasicBlock::Create("reset", oldnext->getParent(),  +  BasicBlock* resetblock = BasicBlock::Create(bb->getContext(), +                                              "reset", oldnext->getParent(),                                                 oldnext);    TerminatorInst* t2 = BranchInst::Create(oldnext, resetblock);    t->setSuccessor(0, resetblock); @@ -315,12 +322,13 @@ void CycleCounter::ProcessChoicePoint(BasicBlock* bb) {    CallInst* c = CallInst::Create(F, "rdcc", t);    BinaryOperator* b =  -    BinaryOperator::CreateAnd(c, ConstantInt::get(Type::Int64Ty, rm), +    BinaryOperator::CreateAnd(c, +                      ConstantInt::get(Type::getInt64Ty(bb->getContext()), rm),                                "mrdcc", t); -  ICmpInst *s = new ICmpInst(ICmpInst::ICMP_EQ, b, -                             ConstantInt::get(Type::Int64Ty, 0),  -                             "mrdccc", t); +  ICmpInst *s = new ICmpInst(t, ICmpInst::ICMP_EQ, b, +                        ConstantInt::get(Type::getInt64Ty(bb->getContext()), 0),  +                             "mrdccc");    t->setCondition(s);  } @@ -345,16 +353,16 @@ void RSProfilers_std::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNu    // Create the getelementptr constant expression    std::vector<Constant*> Indices(2); -  Indices[0] = Constant::getNullValue(Type::Int32Ty); -  Indices[1] = ConstantInt::get(Type::Int32Ty, CounterNum); -  Constant *ElementPtr = ConstantExpr::getGetElementPtr(CounterArray, +  Indices[0] = Constant::getNullValue(Type::getInt32Ty(BB->getContext())); +  Indices[1] = ConstantInt::get(Type::getInt32Ty(BB->getContext()), CounterNum); +  Constant *ElementPtr =ConstantExpr::getGetElementPtr(CounterArray,                                                          &Indices[0], 2);    // Load, increment and store the value back.    Value *OldVal = new LoadInst(ElementPtr, "OldCounter", InsertPos);    profcode.insert(OldVal);    Value *NewVal = BinaryOperator::CreateAdd(OldVal, -                                            ConstantInt::get(Type::Int32Ty, 1), +                       ConstantInt::get(Type::getInt32Ty(BB->getContext()), 1),                                              "NewCounter", InsertPos);    profcode.insert(NewVal);    profcode.insert(new StoreInst(NewVal, ElementPtr, InsertPos)); @@ -377,7 +385,8 @@ Value* ProfilerRS::Translate(Value* v) {      if (bb == &bb->getParent()->getEntryBlock())        TransCache[bb] = bb; //don't translate entry block      else -      TransCache[bb] = BasicBlock::Create("dup_" + bb->getName(), +      TransCache[bb] = BasicBlock::Create(v->getContext(),  +                                          "dup_" + bb->getName(),                                            bb->getParent(), NULL);      return TransCache[bb];    } else if (Instruction* i = dyn_cast<Instruction>(v)) { @@ -401,7 +410,7 @@ Value* ProfilerRS::Translate(Value* v) {      TransCache[v] = v;      return v;    } -  assert(0 && "Value not handled"); +  llvm_unreachable("Value not handled");    return 0;  } @@ -466,16 +475,16 @@ void ProfilerRS::ProcessBackEdge(BasicBlock* src, BasicBlock* dst, Function& F)    //a:    Function::iterator BBN = src; ++BBN; -  BasicBlock* bbC = BasicBlock::Create("choice", &F, BBN); +  BasicBlock* bbC = BasicBlock::Create(F.getContext(), "choice", &F, BBN);    //ChoicePoints.insert(bbC);    BBN = cast<BasicBlock>(Translate(src)); -  BasicBlock* bbCp = BasicBlock::Create("choice", &F, ++BBN); +  BasicBlock* bbCp = BasicBlock::Create(F.getContext(), "choice", &F, ++BBN);    ChoicePoints.insert(bbCp);    //b:    BranchInst::Create(cast<BasicBlock>(Translate(dst)), bbC);    BranchInst::Create(dst, cast<BasicBlock>(Translate(dst)),  -                     ConstantInt::get(Type::Int1Ty, true), bbCp); +              ConstantInt::get(Type::getInt1Ty(src->getContext()), true), bbCp);    //c:    {      TerminatorInst* iB = src->getTerminator(); @@ -531,9 +540,8 @@ bool ProfilerRS::runOnFunction(Function& F) {      TerminatorInst* T = F.getEntryBlock().getTerminator();      ReplaceInstWithInst(T, BranchInst::Create(T->getSuccessor(0),                                                cast<BasicBlock>( -                                                Translate(T->getSuccessor(0))), -                                              ConstantInt::get(Type::Int1Ty, -                                                               true))); +                   Translate(T->getSuccessor(0))), +                      ConstantInt::get(Type::getInt1Ty(F.getContext()), true)));      //do whatever is needed now that the function is duplicated      c->PrepFunction(&F); @@ -556,10 +564,12 @@ bool ProfilerRS::runOnFunction(Function& F) {  bool ProfilerRS::doInitialization(Module &M) {    switch (RandomMethod) {    case GBV: -    c = new GlobalRandomCounter(M, Type::Int32Ty, (1 << 14) - 1); +    c = new GlobalRandomCounter(M, Type::getInt32Ty(M.getContext()), +                                (1 << 14) - 1);      break;    case GBVO: -    c = new GlobalRandomCounterOpt(M, Type::Int32Ty, (1 << 14) - 1); +    c = new GlobalRandomCounterOpt(M, Type::getInt32Ty(M.getContext()), +                                   (1 << 14) - 1);      break;    case HOSTCC:      c = new CycleCounter(M, (1 << 14) - 1); @@ -639,7 +649,7 @@ static void getBackEdges(Function& F, T& BackEdges) {    std::map<BasicBlock*, int> finish;    int time = 0;    recBackEdge(&F.getEntryBlock(), BackEdges, color, depth, finish, time); -  DOUT << F.getName() << " " << BackEdges.size() << "\n"; +  DEBUG(errs() << F.getName() << " " << BackEdges.size() << "\n");  } diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 5fe1eeb5c752..025d02ad3073 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -13,7 +13,7 @@ PARALLEL_DIRS = Utils Instrumentation Scalar IPO Hello  include $(LEVEL)/Makefile.config  # No support for plugins on windows targets -ifeq ($(OS), $(filter $(OS), Cygwin MingW)) +ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))    PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))  endif diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 9c55f664ebbd..37f383fb512a 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -21,19 +21,17 @@  #include "llvm/IntrinsicInst.h"  #include "llvm/Pass.h"  #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/InstIterator.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" -  using namespace llvm;  STATISTIC(NumRemoved, "Number of instructions removed");  namespace { -  struct VISIBILITY_HIDDEN ADCE : public FunctionPass { +  struct ADCE : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      ADCE() : FunctionPass(&ID) {} diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp index fb9b88005b6a..54533f50405f 100644 --- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp +++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp @@ -31,7 +31,6 @@  #include "llvm/Function.h"  #include "llvm/Pass.h"  #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Transforms/Scalar.h"  #include <set> @@ -40,7 +39,7 @@ using namespace llvm;  STATISTIC(NumMoved, "Number of basic blocks moved");  namespace { -  struct VISIBILITY_HIDDEN BlockPlacement : public FunctionPass { +  struct BlockPlacement : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      BlockPlacement() : FunctionPass(&ID) {} @@ -127,13 +126,13 @@ void BlockPlacement::PlaceBlocks(BasicBlock *BB) {        /*empty*/;      if (SI == E) return;  // No more successors to place. -    unsigned MaxExecutionCount = PI->getExecutionCount(*SI); +    double MaxExecutionCount = PI->getExecutionCount(*SI);      BasicBlock *MaxSuccessor = *SI;      // Scan for more frequently executed successors      for (; SI != E; ++SI)        if (!PlacedBlocks.count(*SI)) { -        unsigned Count = PI->getExecutionCount(*SI); +        double Count = PI->getExecutionCount(*SI);          if (Count > MaxExecutionCount ||              // Prefer to not disturb the code.              (Count == MaxExecutionCount && *SI == &*InsertPos)) { diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 8a8f83fa311d..cbeed4c6b55f 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -1,13 +1,13 @@  add_llvm_library(LLVMScalarOpts    ADCE.cpp    BasicBlockPlacement.cpp +  CodeGenLICM.cpp    CodeGenPrepare.cpp    CondPropagate.cpp    ConstantProp.cpp    DCE.cpp    DeadStoreElimination.cpp    GVN.cpp -  GVNPRE.cpp    IndVarSimplify.cpp    InstructionCombining.cpp    JumpThreading.cpp @@ -19,7 +19,6 @@ add_llvm_library(LLVMScalarOpts    LoopUnroll.cpp    LoopUnswitch.cpp    MemCpyOptimizer.cpp -  PredicateSimplifier.cpp    Reassociate.cpp    Reg2Mem.cpp    SCCP.cpp diff --git a/lib/Transforms/Scalar/CodeGenLICM.cpp b/lib/Transforms/Scalar/CodeGenLICM.cpp new file mode 100644 index 000000000000..10f950e135da --- /dev/null +++ b/lib/Transforms/Scalar/CodeGenLICM.cpp @@ -0,0 +1,112 @@ +//===- CodeGenLICM.cpp - LICM a function for code generation --------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This function performs late LICM, hoisting constants out of loops that +// are not valid immediates. It should not be followed by instcombine, +// because instcombine would quickly stuff the constants back into the loop. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "codegen-licm" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/DenseMap.h" +using namespace llvm; + +namespace { +  class CodeGenLICM : public LoopPass { +    virtual bool runOnLoop(Loop *L, LPPassManager &LPM); +    virtual void getAnalysisUsage(AnalysisUsage &AU) const; +  public: +    static char ID; // Pass identification, replacement for typeid +    explicit CodeGenLICM() : LoopPass(&ID) {} +  }; +} + +char CodeGenLICM::ID = 0; +static RegisterPass<CodeGenLICM> X("codegen-licm", +                                   "hoist constants out of loops"); + +Pass *llvm::createCodeGenLICMPass() { +  return new CodeGenLICM(); +} + +bool CodeGenLICM::runOnLoop(Loop *L, LPPassManager &) { +  bool Changed = false; + +  // Only visit outermost loops. +  if (L->getParentLoop()) return Changed; + +  Instruction *PreheaderTerm = L->getLoopPreheader()->getTerminator(); +  DenseMap<Constant *, BitCastInst *> HoistedConstants; + +  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); +       I != E; ++I) { +    BasicBlock *BB = *I; +    for (BasicBlock::iterator BBI = BB->begin(), BBE = BB->end(); +         BBI != BBE; ++BBI) { +      Instruction *I = BBI; +      // TODO: For now, skip all intrinsic instructions, because some of them +      // can require their operands to be constants, and we don't want to +      // break that. +      if (isa<IntrinsicInst>(I)) +        continue; +      // LLVM represents fneg as -0.0-x; don't hoist the -0.0 out. +      if (BinaryOperator::isFNeg(I) || +          BinaryOperator::isNeg(I) || +          BinaryOperator::isNot(I)) +        continue; +      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { +        // Don't hoist out switch case constants. +        if (isa<SwitchInst>(I) && i == 1) +          break; +        // Don't hoist out shuffle masks. +        if (isa<ShuffleVectorInst>(I) && i == 2) +          break; +        Value *Op = I->getOperand(i); +        Constant *C = dyn_cast<Constant>(Op); +        if (!C) continue; +        // TODO: Ask the target which constants are legal. This would allow +        // us to add support for hoisting ConstantInts and GlobalValues too. +        if (isa<ConstantFP>(C) || +            isa<ConstantVector>(C) || +            isa<ConstantAggregateZero>(C)) { +          BitCastInst *&BC = HoistedConstants[C]; +          if (!BC) +            BC = new BitCastInst(C, C->getType(), "hoist", PreheaderTerm); +          I->setOperand(i, BC); +          Changed = true; +        } +      } +    } +  } + +  return Changed; +} + +void CodeGenLICM::getAnalysisUsage(AnalysisUsage &AU) const { +  // This pass preserves just about everything. List some popular things here. +  AU.setPreservesCFG(); +  AU.addPreservedID(LoopSimplifyID); +  AU.addPreserved<LoopInfo>(); +  AU.addPreserved<AliasAnalysis>(); +  AU.addPreserved("scalar-evolution"); +  AU.addPreserved("iv-users"); +  AU.addPreserved("lda"); +  AU.addPreserved("live-values"); + +  // Hoisting requires a loop preheader. +  AU.addRequiredID(LoopSimplifyID); +} diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 85e9243e3ce8..a3e3fea4da07 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -23,10 +23,9 @@  #include "llvm/IntrinsicInst.h"  #include "llvm/LLVMContext.h"  #include "llvm/Pass.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Analysis/ProfileInfo.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h"  #include "llvm/Transforms/Utils/AddrModeMatcher.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h" @@ -35,10 +34,10 @@  #include "llvm/Assembly/Writer.h"  #include "llvm/Support/CallSite.h"  #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/raw_ostream.h"  using namespace llvm;  using namespace llvm::PatternMatch; @@ -46,10 +45,11 @@ static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",                                         cl::init(false), cl::Hidden);  namespace { -  class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { +  class CodeGenPrepare : public FunctionPass {      /// TLI - Keep a pointer of a TargetLowering to consult for determining      /// transformation profitability.      const TargetLowering *TLI; +    ProfileInfo *PI;      /// BackEdges - Keep a set of all the loop back edges.      /// @@ -60,6 +60,10 @@ namespace {        : FunctionPass(&ID), TLI(tli) {}      bool runOnFunction(Function &F); +    virtual void getAnalysisUsage(AnalysisUsage &AU) const { +      AU.addPreserved<ProfileInfo>(); +    } +    private:      bool EliminateMostlyEmptyBlocks(Function &F);      bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -95,6 +99,7 @@ void CodeGenPrepare::findLoopBackEdges(const Function &F) {  bool CodeGenPrepare::runOnFunction(Function &F) {    bool EverMadeChange = false; +  PI = getAnalysisIfAvailable<ProfileInfo>();    // First pass, eliminate blocks that contain only PHI nodes and an    // unconditional branch.    EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -232,7 +237,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {    BranchInst *BI = cast<BranchInst>(BB->getTerminator());    BasicBlock *DestBB = BI->getSuccessor(0); -  DOUT << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; +  DEBUG(errs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);    // If the destination block has a single pred, then this is a trivial edge,    // just collapse it. @@ -241,12 +246,12 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {        // Remember if SinglePred was the entry block of the function.  If so, we        // will need to move BB back to the entry position.        bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); -      MergeBasicBlockIntoOnlyPred(DestBB); +      MergeBasicBlockIntoOnlyPred(DestBB, this);        if (isEntry && BB != &BB->getParent()->getEntryBlock())          BB->moveBefore(&BB->getParent()->getEntryBlock()); -      DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; +      DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");        return;      }    } @@ -283,9 +288,13 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {    // The PHIs are now updated, change everything that refers to BB to use    // DestBB and remove BB.    BB->replaceAllUsesWith(DestBB); +  if (PI) { +    PI->replaceAllUses(BB, DestBB); +    PI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); +  }    BB->eraseFromParent(); -  DOUT << "AFTER:\n" << *DestBB << "\n\n\n"; +  DEBUG(errs() << "AFTER:\n" << *DestBB << "\n\n\n");  } @@ -358,6 +367,9 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,        // If we found a workable predecessor, change TI to branch to Succ.        if (FoundMatch) { +        ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); +        if (PI) +          PI->splitEdge(TIBB, Dest, Pred);          Dest->removePredecessor(TIBB);          TI->setSuccessor(SuccNum, Pred);          return; @@ -410,8 +422,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,  ///  static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){    // If this is a noop copy, -  MVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); -  MVT DstVT = TLI.getValueType(CI->getType()); +  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); +  EVT DstVT = TLI.getValueType(CI->getType());    // This is an fp<->int conversion?    if (SrcVT.isInteger() != DstVT.isInteger()) @@ -424,10 +436,10 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){    // If these values will be promoted, find out what they will be promoted    // to.  This helps us consider truncates on PPC as noop copies when they    // are. -  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote) -    SrcVT = TLI.getTypeToTransformTo(SrcVT); -  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote) -    DstVT = TLI.getTypeToTransformTo(DstVT); +  if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote) +    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); +  if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote) +    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);    // If, after promotion, these are the same types, this is a noop copy.    if (SrcVT != DstVT) @@ -520,7 +532,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) {        BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();        InsertedCmp = -        CmpInst::Create(CI->getOpcode(), CI->getPredicate(), CI->getOperand(0), +        CmpInst::Create(CI->getOpcode(), +                        CI->getPredicate(),  CI->getOperand(0),                          CI->getOperand(1), "", InsertPt);        MadeChange = true;      } @@ -577,7 +590,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,    // If all the instructions matched are already in this BB, don't do anything.    if (!AnyNonLocal) { -    DEBUG(cerr << "CGP: Found      local addrmode: " << AddrMode << "\n"); +    DEBUG(errs() << "CGP: Found      local addrmode: " << AddrMode << "\n");      return false;    } @@ -592,14 +605,15 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,    // computation.    Value *&SunkAddr = SunkAddrs[Addr];    if (SunkAddr) { -    DEBUG(cerr << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " -               << *MemoryInst); +    DEBUG(errs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " +                 << *MemoryInst);      if (SunkAddr->getType() != Addr->getType())        SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);    } else { -    DEBUG(cerr << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " -               << *MemoryInst); -    const Type *IntPtrTy = TLI->getTargetData()->getIntPtrType(); +    DEBUG(errs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " +                 << *MemoryInst); +    const Type *IntPtrTy = +          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());      Value *Result = 0;      // Start with the scale value. @@ -616,7 +630,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,          V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);        }        if (AddrMode.Scale != 1) -        V = BinaryOperator::CreateMul(V, Context->getConstantInt(IntPtrTy, +        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,                                                                  AddrMode.Scale),                                        "sunkaddr", InsertPt);        Result = V; @@ -648,7 +662,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,      // Add in the Base Offset if present.      if (AddrMode.BaseOffs) { -      Value *V = Context->getConstantInt(IntPtrTy, AddrMode.BaseOffs); +      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);        if (Result)          Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);        else @@ -656,7 +670,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,      }      if (Result == 0) -      SunkAddr = Context->getNullValue(Addr->getType()); +      SunkAddr = Constant::getNullValue(Addr->getType());      else        SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);    } @@ -858,18 +872,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {      } else if (CallInst *CI = dyn_cast<CallInst>(I)) {        // If we found an inline asm expession, and if the target knows how to        // lower it to normal LLVM code, do so now. -      if (TLI && isa<InlineAsm>(CI->getCalledValue())) -        if (const TargetAsmInfo *TAI = -            TLI->getTargetMachine().getTargetAsmInfo()) { -          if (TAI->ExpandInlineAsm(CI)) { -            BBI = BB.begin(); -            // Avoid processing instructions out of order, which could cause -            // reuse before a value is defined. -            SunkAddrs.clear(); -          } else -            // Sink address computing for memory operands into the block. -            MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); -        } +      if (TLI && isa<InlineAsm>(CI->getCalledValue())) { +        if (TLI->ExpandInlineAsm(CI)) { +          BBI = BB.begin(); +          // Avoid processing instructions out of order, which could cause +          // reuse before a value is defined. +          SunkAddrs.clear(); +        } else +          // Sink address computing for memory operands into the block. +          MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs); +      }      }    } diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp index c85d0317d65f..5b573f492cdc 100644 --- a/lib/Transforms/Scalar/CondPropagate.cpp +++ b/lib/Transforms/Scalar/CondPropagate.cpp @@ -14,26 +14,21 @@  #define DEBUG_TYPE "condprop"  #include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h"  #include "llvm/Pass.h"  #include "llvm/Type.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h" -#include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Streams.h"  using namespace llvm;  STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");  STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");  namespace { -  struct VISIBILITY_HIDDEN CondProp : public FunctionPass { +  struct CondProp : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      CondProp() : FunctionPass(&ID) {} @@ -124,7 +119,7 @@ void CondProp::SimplifyBlock(BasicBlock *BB) {        // Succ is now dead, but we cannot delete it without potentially        // invalidating iterators elsewhere.  Just insert an unreachable        // instruction in it and delete this block later on. -      new UnreachableInst(Succ); +      new UnreachableInst(BB->getContext(), Succ);        DeadBlocks.push_back(Succ);        MadeChange = true;      } @@ -196,8 +191,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) {    if (&*BBI != SI)      return; -  bool RemovedPreds = false; -    // Ok, we have this really simple case, walk the PHI operands, looking for    // constants.  Walk from the end to remove operands from the end when    // possible, and to avoid invalidating "i". @@ -209,7 +202,6 @@ void CondProp::SimplifyPredecessors(SwitchInst *SI) {        RevectorBlockTo(PN->getIncomingBlock(i-1),                        SI->getSuccessor(DestCase));        ++NumSwThread; -      RemovedPreds = true;        // If there were two predecessors before this simplification, or if the        // PHI node contained all the same value except for the one we just diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index b933488cf636..4fee327ebec1 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -24,7 +24,6 @@  #include "llvm/Constant.h"  #include "llvm/Instruction.h"  #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/InstIterator.h"  #include "llvm/ADT/Statistic.h"  #include <set> @@ -33,7 +32,7 @@ using namespace llvm;  STATISTIC(NumInstKilled, "Number of instructions killed");  namespace { -  struct VISIBILITY_HIDDEN ConstantPropagation : public FunctionPass { +  struct ConstantPropagation : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      ConstantPropagation() : FunctionPass(&ID) {} @@ -67,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {      WorkList.erase(WorkList.begin());    // Get an element from the worklist...      if (!I->use_empty())                 // Don't muck with dead instructions... -      if (Constant *C = ConstantFoldInstruction(I)) { +      if (Constant *C = ConstantFoldInstruction(I, F.getContext())) {          // Add all of the users of this instruction to the worklist, they might          // be constant propagatable now...          for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8bb504c09c6e..39940c35da5d 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -21,7 +21,6 @@  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Instruction.h"  #include "llvm/Pass.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/InstIterator.h"  #include "llvm/ADT/Statistic.h"  #include <set> @@ -34,7 +33,7 @@ namespace {    //===--------------------------------------------------------------------===//    // DeadInstElimination pass implementation    // -  struct VISIBILITY_HIDDEN DeadInstElimination : public BasicBlockPass { +  struct DeadInstElimination : public BasicBlockPass {      static char ID; // Pass identification, replacement for typeid      DeadInstElimination() : BasicBlockPass(&ID) {}      virtual bool runOnBasicBlock(BasicBlock &BB) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index b923c92bd300..a7b3e7524fa2 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -29,14 +29,15 @@  #include "llvm/Analysis/MemoryDependenceAnalysis.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h"  using namespace llvm;  STATISTIC(NumFastStores, "Number of stores deleted");  STATISTIC(NumFastOther , "Number of other instrs removed");  namespace { -  struct VISIBILITY_HIDDEN DSE : public FunctionPass { +  struct DSE : public FunctionPass { +    TargetData *TD; +      static char ID; // Pass identification, replacement for typeid      DSE() : FunctionPass(&ID) {} @@ -62,7 +63,6 @@ namespace {      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        AU.setPreservesCFG();        AU.addRequired<DominatorTree>(); -      AU.addRequired<TargetData>();        AU.addRequired<AliasAnalysis>();        AU.addRequired<MemoryDependenceAnalysis>();        AU.addPreserved<DominatorTree>(); @@ -79,15 +79,15 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }  bool DSE::runOnBasicBlock(BasicBlock &BB) {    MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); -  TargetData &TD = getAnalysis<TargetData>();   +  TD = getAnalysisIfAvailable<TargetData>();    bool MadeChange = false; -  // Do a top-down walk on the BB +  // Do a top-down walk on the BB.    for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {      Instruction *Inst = BBI++; -    // If we find a store or a free, get it's memory dependence. +    // If we find a store or a free, get its memory dependence.      if (!isa<StoreInst>(Inst) && !isa<FreeInst>(Inst))        continue; @@ -117,13 +117,17 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {      // If this is a store-store dependence, then the previous store is dead so      // long as this store is at least as big as it.      if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst())) -      if (TD.getTypeStoreSize(DepStore->getOperand(0)->getType()) <= -          TD.getTypeStoreSize(SI->getOperand(0)->getType())) { +      if (TD && +          TD->getTypeStoreSize(DepStore->getOperand(0)->getType()) <= +          TD->getTypeStoreSize(SI->getOperand(0)->getType())) {          // Delete the store and now-dead instructions that feed it.          DeleteDeadInstruction(DepStore);          NumFastStores++;          MadeChange = true; -         + +        // DeleteDeadInstruction can delete the current instruction in loop +        // cases, reset BBI. +        BBI = Inst;          if (BBI != BB.begin())            --BBI;          continue; @@ -134,8 +138,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&            SI->getOperand(0) == DepLoad) { +        // DeleteDeadInstruction can delete the current instruction.  Save BBI +        // in case we need it. +        WeakVH NextInst(BBI); +                  DeleteDeadInstruction(SI); -        if (BBI != BB.begin()) +         +        if (NextInst == 0)  // Next instruction deleted. +          BBI = BB.begin(); +        else if (BBI != BB.begin())  // Revisit this instruction if possible.            --BBI;          NumFastStores++;          MadeChange = true; @@ -181,7 +192,6 @@ bool DSE::handleFreeWithNonTrivialDependency(FreeInst *F, MemDepResult Dep) {  /// store i32 1, i32* %A  /// ret void  bool DSE::handleEndBlock(BasicBlock &BB) { -  TargetData &TD = getAnalysis<TargetData>();    AliasAnalysis &AA = getAnalysis<AliasAnalysis>();    bool MadeChange = false; @@ -302,14 +312,16 @@ bool DSE::handleEndBlock(BasicBlock &BB) {          // Get size information for the alloca          unsigned pointerSize = ~0U; -        if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { -          if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) -            pointerSize = C->getZExtValue() * -                          TD.getTypeAllocSize(A->getAllocatedType()); -        } else { -          const PointerType* PT = cast<PointerType>( -                                                 cast<Argument>(*I)->getType()); -          pointerSize = TD.getTypeAllocSize(PT->getElementType()); +        if (TD) { +          if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { +            if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) +              pointerSize = C->getZExtValue() * +                            TD->getTypeAllocSize(A->getAllocatedType()); +          } else { +            const PointerType* PT = cast<PointerType>( +                                                   cast<Argument>(*I)->getType()); +            pointerSize = TD->getTypeAllocSize(PT->getElementType()); +          }          }          // See if the call site touches it @@ -357,7 +369,6 @@ bool DSE::handleEndBlock(BasicBlock &BB) {  bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,                                 BasicBlock::iterator &BBI,                                 SmallPtrSet<Value*, 64>& deadPointers) { -  TargetData &TD = getAnalysis<TargetData>();    AliasAnalysis &AA = getAnalysis<AliasAnalysis>();    // If the kill pointer can be easily reduced to an alloca, @@ -379,13 +390,15 @@ bool DSE::RemoveUndeadPointers(Value* killPointer, uint64_t killPointerSize,        E = deadPointers.end(); I != E; ++I) {      // Get size information for the alloca.      unsigned pointerSize = ~0U; -    if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { -      if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) -        pointerSize = C->getZExtValue() * -                      TD.getTypeAllocSize(A->getAllocatedType()); -    } else { -      const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType()); -      pointerSize = TD.getTypeAllocSize(PT->getElementType()); +    if (TD) { +      if (AllocaInst* A = dyn_cast<AllocaInst>(*I)) { +        if (ConstantInt* C = dyn_cast<ConstantInt>(A->getArraySize())) +          pointerSize = C->getZExtValue() * +                        TD->getTypeAllocSize(A->getAllocatedType()); +      } else { +        const PointerType* PT = cast<PointerType>(cast<Argument>(*I)->getType()); +        pointerSize = TD->getTypeAllocSize(PT->getElementType()); +      }      }      // See if this pointer could alias it diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index f4fe15e0e525..2ed4a638adf4 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -23,6 +23,7 @@  #include "llvm/Function.h"  #include "llvm/IntrinsicInst.h"  #include "llvm/LLVMContext.h" +#include "llvm/Operator.h"  #include "llvm/Value.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/DepthFirstIterator.h" @@ -32,13 +33,18 @@  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/Dominators.h"  #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MallocHelper.h"  #include "llvm/Analysis/MemoryDependenceAnalysis.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetData.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h"  #include <cstdio>  using namespace llvm; @@ -60,17 +66,17 @@ static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));  /// as an efficient mechanism to determine the expression-wise equivalence of  /// two values.  namespace { -  struct VISIBILITY_HIDDEN Expression { +  struct Expression {      enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL,                              UDIV, SDIV, FDIV, UREM, SREM, -                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ,  -                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE,  -                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ,  -                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE,  -                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,  +                            FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, +                            ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, +                            ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, +                            FCMPOGT, FCMPOGE, FCMPOLT, FCMPOLE, FCMPONE, +                            FCMPORD, FCMPUNO, FCMPUEQ, FCMPUGT, FCMPUGE,                              FCMPULT, FCMPULE, FCMPUNE, EXTRACT, INSERT,                              SHUFFLE, SELECT, TRUNC, ZEXT, SEXT, FPTOUI, -                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,  +                            FPTOSI, UITOFP, SITOFP, FPTRUNC, FPEXT,                              PTRTOINT, INTTOPTR, BITCAST, GEP, CALL, CONSTANT,                              EMPTY, TOMBSTONE }; @@ -80,11 +86,11 @@ namespace {      uint32_t secondVN;      uint32_t thirdVN;      SmallVector<uint32_t, 4> varargs; -    Value* function; -   +    Value *function; +      Expression() { }      Expression(ExpressionOpcode o) : opcode(o) { } -   +      bool operator==(const Expression &other) const {        if (opcode != other.opcode)          return false; @@ -103,30 +109,30 @@ namespace {        else {          if (varargs.size() != other.varargs.size())            return false; -       +          for (size_t i = 0; i < varargs.size(); ++i)            if (varargs[i] != other.varargs[i])              return false; -     +          return true;        }      } -   +      bool operator!=(const Expression &other) const {        return !(*this == other);      }    }; -   -  class VISIBILITY_HIDDEN ValueTable { + +  class ValueTable {      private:        DenseMap<Value*, uint32_t> valueNumbering;        DenseMap<Expression, uint32_t> expressionNumbering;        AliasAnalysis* AA;        MemoryDependenceAnalysis* MD;        DominatorTree* DT; -   +        uint32_t nextValueNumber; -     +        Expression::ExpressionOpcode getOpcode(BinaryOperator* BO);        Expression::ExpressionOpcode getOpcode(CmpInst* C);        Expression::ExpressionOpcode getOpcode(CastInst* C); @@ -142,11 +148,11 @@ namespace {        Expression create_expression(Constant* C);      public:        ValueTable() : nextValueNumber(1) { } -      uint32_t lookup_or_add(Value* V); -      uint32_t lookup(Value* V) const; -      void add(Value* V, uint32_t num); +      uint32_t lookup_or_add(Value *V); +      uint32_t lookup(Value *V) const; +      void add(Value *V, uint32_t num);        void clear(); -      void erase(Value* v); +      void erase(Value *v);        unsigned size();        void setAliasAnalysis(AliasAnalysis* A) { AA = A; }        AliasAnalysis *getAliasAnalysis() const { return AA; } @@ -162,30 +168,30 @@ template <> struct DenseMapInfo<Expression> {    static inline Expression getEmptyKey() {      return Expression(Expression::EMPTY);    } -   +    static inline Expression getTombstoneKey() {      return Expression(Expression::TOMBSTONE);    } -   +    static unsigned getHashValue(const Expression e) {      unsigned hash = e.opcode; -     +      hash = e.firstVN + hash * 37;      hash = e.secondVN + hash * 37;      hash = e.thirdVN + hash * 37; -     +      hash = ((unsigned)((uintptr_t)e.type >> 4) ^              (unsigned)((uintptr_t)e.type >> 9)) +             hash * 37; -     +      for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),           E = e.varargs.end(); I != E; ++I)        hash = *I + hash * 37; -     +      hash = ((unsigned)((uintptr_t)e.function >> 4) ^              (unsigned)((uintptr_t)e.function >> 9)) +             hash * 37; -     +      return hash;    }    static bool isEqual(const Expression &LHS, const Expression &RHS) { @@ -201,7 +207,7 @@ template <> struct DenseMapInfo<Expression> {  Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {    switch(BO->getOpcode()) {    default: // THIS SHOULD NEVER HAPPEN -    assert(0 && "Binary operator with unknown opcode?"); +    llvm_unreachable("Binary operator with unknown opcode?");    case Instruction::Add:  return Expression::ADD;    case Instruction::FAdd: return Expression::FADD;    case Instruction::Sub:  return Expression::SUB; @@ -224,10 +230,10 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) {  }  Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) { -  if (isa<ICmpInst>(C) || isa<VICmpInst>(C)) { +  if (isa<ICmpInst>(C)) {      switch (C->getPredicate()) {      default:  // THIS SHOULD NEVER HAPPEN -      assert(0 && "Comparison with unknown predicate?"); +      llvm_unreachable("Comparison with unknown predicate?");      case ICmpInst::ICMP_EQ:  return Expression::ICMPEQ;      case ICmpInst::ICMP_NE:  return Expression::ICMPNE;      case ICmpInst::ICMP_UGT: return Expression::ICMPUGT; @@ -239,32 +245,32 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CmpInst* C) {      case ICmpInst::ICMP_SLT: return Expression::ICMPSLT;      case ICmpInst::ICMP_SLE: return Expression::ICMPSLE;      } -  } -  assert((isa<FCmpInst>(C) || isa<VFCmpInst>(C)) && "Unknown compare"); -  switch (C->getPredicate()) { -  default: // THIS SHOULD NEVER HAPPEN -    assert(0 && "Comparison with unknown predicate?"); -  case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; -  case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; -  case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; -  case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; -  case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; -  case FCmpInst::FCMP_ONE: return Expression::FCMPONE; -  case FCmpInst::FCMP_ORD: return Expression::FCMPORD; -  case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; -  case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; -  case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; -  case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; -  case FCmpInst::FCMP_ULT: return Expression::FCMPULT; -  case FCmpInst::FCMP_ULE: return Expression::FCMPULE; -  case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; +  } else { +    switch (C->getPredicate()) { +    default: // THIS SHOULD NEVER HAPPEN +      llvm_unreachable("Comparison with unknown predicate?"); +    case FCmpInst::FCMP_OEQ: return Expression::FCMPOEQ; +    case FCmpInst::FCMP_OGT: return Expression::FCMPOGT; +    case FCmpInst::FCMP_OGE: return Expression::FCMPOGE; +    case FCmpInst::FCMP_OLT: return Expression::FCMPOLT; +    case FCmpInst::FCMP_OLE: return Expression::FCMPOLE; +    case FCmpInst::FCMP_ONE: return Expression::FCMPONE; +    case FCmpInst::FCMP_ORD: return Expression::FCMPORD; +    case FCmpInst::FCMP_UNO: return Expression::FCMPUNO; +    case FCmpInst::FCMP_UEQ: return Expression::FCMPUEQ; +    case FCmpInst::FCMP_UGT: return Expression::FCMPUGT; +    case FCmpInst::FCMP_UGE: return Expression::FCMPUGE; +    case FCmpInst::FCMP_ULT: return Expression::FCMPULT; +    case FCmpInst::FCMP_ULE: return Expression::FCMPULE; +    case FCmpInst::FCMP_UNE: return Expression::FCMPUNE; +    }    }  }  Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {    switch(C->getOpcode()) {    default: // THIS SHOULD NEVER HAPPEN -    assert(0 && "Cast operator with unknown opcode?"); +    llvm_unreachable("Cast operator with unknown opcode?");    case Instruction::Trunc:    return Expression::TRUNC;    case Instruction::ZExt:     return Expression::ZEXT;    case Instruction::SExt:     return Expression::SEXT; @@ -282,126 +288,126 @@ Expression::ExpressionOpcode ValueTable::getOpcode(CastInst* C) {  Expression ValueTable::create_expression(CallInst* C) {    Expression e; -   +    e.type = C->getType();    e.firstVN = 0;    e.secondVN = 0;    e.thirdVN = 0;    e.function = C->getCalledFunction();    e.opcode = Expression::CALL; -   +    for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();         I != E; ++I)      e.varargs.push_back(lookup_or_add(*I)); -   +    return e;  }  Expression ValueTable::create_expression(BinaryOperator* BO) {    Expression e; -     +    e.firstVN = lookup_or_add(BO->getOperand(0));    e.secondVN = lookup_or_add(BO->getOperand(1));    e.thirdVN = 0;    e.function = 0;    e.type = BO->getType();    e.opcode = getOpcode(BO); -   +    return e;  }  Expression ValueTable::create_expression(CmpInst* C) {    Expression e; -     +    e.firstVN = lookup_or_add(C->getOperand(0));    e.secondVN = lookup_or_add(C->getOperand(1));    e.thirdVN = 0;    e.function = 0;    e.type = C->getType();    e.opcode = getOpcode(C); -   +    return e;  }  Expression ValueTable::create_expression(CastInst* C) {    Expression e; -     +    e.firstVN = lookup_or_add(C->getOperand(0));    e.secondVN = 0;    e.thirdVN = 0;    e.function = 0;    e.type = C->getType();    e.opcode = getOpcode(C); -   +    return e;  }  Expression ValueTable::create_expression(ShuffleVectorInst* S) {    Expression e; -     +    e.firstVN = lookup_or_add(S->getOperand(0));    e.secondVN = lookup_or_add(S->getOperand(1));    e.thirdVN = lookup_or_add(S->getOperand(2));    e.function = 0;    e.type = S->getType();    e.opcode = Expression::SHUFFLE; -   +    return e;  }  Expression ValueTable::create_expression(ExtractElementInst* E) {    Expression e; -     +    e.firstVN = lookup_or_add(E->getOperand(0));    e.secondVN = lookup_or_add(E->getOperand(1));    e.thirdVN = 0;    e.function = 0;    e.type = E->getType();    e.opcode = Expression::EXTRACT; -   +    return e;  }  Expression ValueTable::create_expression(InsertElementInst* I) {    Expression e; -     +    e.firstVN = lookup_or_add(I->getOperand(0));    e.secondVN = lookup_or_add(I->getOperand(1));    e.thirdVN = lookup_or_add(I->getOperand(2));    e.function = 0;    e.type = I->getType();    e.opcode = Expression::INSERT; -   +    return e;  }  Expression ValueTable::create_expression(SelectInst* I) {    Expression e; -     +    e.firstVN = lookup_or_add(I->getCondition());    e.secondVN = lookup_or_add(I->getTrueValue());    e.thirdVN = lookup_or_add(I->getFalseValue());    e.function = 0;    e.type = I->getType();    e.opcode = Expression::SELECT; -   +    return e;  }  Expression ValueTable::create_expression(GetElementPtrInst* G) {    Expression e; -   +    e.firstVN = lookup_or_add(G->getPointerOperand());    e.secondVN = 0;    e.thirdVN = 0;    e.function = 0;    e.type = G->getType();    e.opcode = Expression::GEP; -   +    for (GetElementPtrInst::op_iterator I = G->idx_begin(), E = G->idx_end();         I != E; ++I)      e.varargs.push_back(lookup_or_add(*I)); -   +    return e;  } @@ -410,21 +416,21 @@ Expression ValueTable::create_expression(GetElementPtrInst* G) {  //===----------------------------------------------------------------------===//  /// add - Insert a value into the table with a specified value number. -void ValueTable::add(Value* V, uint32_t num) { +void ValueTable::add(Value *V, uint32_t num) {    valueNumbering.insert(std::make_pair(V, num));  }  /// lookup_or_add - Returns the value number for the specified value, assigning  /// it a new number if it did not have one before. -uint32_t ValueTable::lookup_or_add(Value* V) { +uint32_t ValueTable::lookup_or_add(Value *V) {    DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);    if (VI != valueNumbering.end())      return VI->second; -   +    if (CallInst* C = dyn_cast<CallInst>(V)) {      if (AA->doesNotAccessMemory(C)) {        Expression e = create_expression(C); -     +        DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);        if (EI != expressionNumbering.end()) {          valueNumbering.insert(std::make_pair(V, EI->second)); @@ -432,20 +438,20 @@ uint32_t ValueTable::lookup_or_add(Value* V) {        } else {          expressionNumbering.insert(std::make_pair(e, nextValueNumber));          valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +          return nextValueNumber++;        }      } else if (AA->onlyReadsMemory(C)) {        Expression e = create_expression(C); -       +        if (expressionNumbering.find(e) == expressionNumbering.end()) {          expressionNumbering.insert(std::make_pair(e, nextValueNumber));          valueNumbering.insert(std::make_pair(V, nextValueNumber));          return nextValueNumber++;        } -       +        MemDepResult local_dep = MD->getDependency(C); -       +        if (!local_dep.isDef() && !local_dep.isNonLocal()) {          valueNumbering.insert(std::make_pair(V, nextValueNumber));          return nextValueNumber++; @@ -453,12 +459,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {        if (local_dep.isDef()) {          CallInst* local_cdep = cast<CallInst>(local_dep.getInst()); -         +          if (local_cdep->getNumOperands() != C->getNumOperands()) {            valueNumbering.insert(std::make_pair(V, nextValueNumber));            return nextValueNumber++;          } -           +          for (unsigned i = 1; i < C->getNumOperands(); ++i) {            uint32_t c_vn = lookup_or_add(C->getOperand(i));            uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); @@ -467,19 +473,19 @@ uint32_t ValueTable::lookup_or_add(Value* V) {              return nextValueNumber++;            }          } -       +          uint32_t v = lookup_or_add(local_cdep);          valueNumbering.insert(std::make_pair(V, v));          return v;        }        // Non-local case. -      const MemoryDependenceAnalysis::NonLocalDepInfo &deps =  +      const MemoryDependenceAnalysis::NonLocalDepInfo &deps =          MD->getNonLocalCallDependency(CallSite(C));        // FIXME: call/call dependencies for readonly calls should return def, not        // clobber!  Move the checking logic to MemDep!        CallInst* cdep = 0; -       +        // Check to see if we have a single dominating call instruction that is        // identical to C.        for (unsigned i = 0, e = deps.size(); i != e; ++i) { @@ -494,23 +500,23 @@ uint32_t ValueTable::lookup_or_add(Value* V) {            cdep = 0;            break;          } -         +          CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());          // FIXME: All duplicated with non-local case.          if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){            cdep = NonLocalDepCall;            continue;          } -         +          cdep = 0;          break;        } -       +        if (!cdep) {          valueNumbering.insert(std::make_pair(V, nextValueNumber));          return nextValueNumber++;        } -       +        if (cdep->getNumOperands() != C->getNumOperands()) {          valueNumbering.insert(std::make_pair(V, nextValueNumber));          return nextValueNumber++; @@ -523,18 +529,18 @@ uint32_t ValueTable::lookup_or_add(Value* V) {            return nextValueNumber++;          }        } -       +        uint32_t v = lookup_or_add(cdep);        valueNumbering.insert(std::make_pair(V, v));        return v; -       +      } else {        valueNumbering.insert(std::make_pair(V, nextValueNumber));        return nextValueNumber++;      }    } else if (BinaryOperator* BO = dyn_cast<BinaryOperator>(V)) {      Expression e = create_expression(BO); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -542,12 +548,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else if (CmpInst* C = dyn_cast<CmpInst>(V)) {      Expression e = create_expression(C); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -555,12 +561,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else if (ShuffleVectorInst* U = dyn_cast<ShuffleVectorInst>(V)) {      Expression e = create_expression(U); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -568,12 +574,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else if (ExtractElementInst* U = dyn_cast<ExtractElementInst>(V)) {      Expression e = create_expression(U); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -581,12 +587,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else if (InsertElementInst* U = dyn_cast<InsertElementInst>(V)) {      Expression e = create_expression(U); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -594,12 +600,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else if (SelectInst* U = dyn_cast<SelectInst>(V)) {      Expression e = create_expression(U); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -607,12 +613,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else if (CastInst* U = dyn_cast<CastInst>(V)) {      Expression e = create_expression(U); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -620,12 +626,12 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else if (GetElementPtrInst* U = dyn_cast<GetElementPtrInst>(V)) {      Expression e = create_expression(U); -     +      DenseMap<Expression, uint32_t>::iterator EI = expressionNumbering.find(e);      if (EI != expressionNumbering.end()) {        valueNumbering.insert(std::make_pair(V, EI->second)); @@ -633,7 +639,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) {      } else {        expressionNumbering.insert(std::make_pair(e, nextValueNumber));        valueNumbering.insert(std::make_pair(V, nextValueNumber)); -       +        return nextValueNumber++;      }    } else { @@ -644,7 +650,7 @@ uint32_t ValueTable::lookup_or_add(Value* V) {  /// lookup - Returns the value number of the specified value. Fails if  /// the value has not yet been numbered. -uint32_t ValueTable::lookup(Value* V) const { +uint32_t ValueTable::lookup(Value *V) const {    DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);    assert(VI != valueNumbering.end() && "Value not numbered?");    return VI->second; @@ -658,7 +664,7 @@ void ValueTable::clear() {  }  /// erase - Remove a value from the value numbering -void ValueTable::erase(Value* V) { +void ValueTable::erase(Value *V) {    valueNumbering.erase(V);  } @@ -676,17 +682,17 @@ void ValueTable::verifyRemoved(const Value *V) const {  //===----------------------------------------------------------------------===//  namespace { -  struct VISIBILITY_HIDDEN ValueNumberScope { +  struct ValueNumberScope {      ValueNumberScope* parent;      DenseMap<uint32_t, Value*> table; -     +      ValueNumberScope(ValueNumberScope* p) : parent(p) { }    };  }  namespace { -  class VISIBILITY_HIDDEN GVN : public FunctionPass { +  class GVN : public FunctionPass {      bool runOnFunction(Function &F);    public:      static char ID; // Pass identification, replacement for typeid @@ -698,45 +704,35 @@ namespace {      ValueTable VN;      DenseMap<BasicBlock*, ValueNumberScope*> localAvail; -     -    typedef DenseMap<Value*, SmallPtrSet<Instruction*, 4> > PhiMapType; -    PhiMapType phiMap; -     -     +      // This transformation requires dominator postdominator info      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        AU.addRequired<DominatorTree>();        AU.addRequired<MemoryDependenceAnalysis>();        AU.addRequired<AliasAnalysis>(); -       +        AU.addPreserved<DominatorTree>();        AU.addPreserved<AliasAnalysis>();      } -   +      // Helper fuctions      // FIXME: eliminate or document these better      bool processLoad(LoadInst* L,                       SmallVectorImpl<Instruction*> &toErase); -    bool processInstruction(Instruction* I, +    bool processInstruction(Instruction *I,                              SmallVectorImpl<Instruction*> &toErase);      bool processNonLocalLoad(LoadInst* L,                               SmallVectorImpl<Instruction*> &toErase); -    bool processBlock(BasicBlock* BB); -    Value *GetValueForBlock(BasicBlock *BB, Instruction* orig, -                            DenseMap<BasicBlock*, Value*> &Phis, -                            bool top_level = false); +    bool processBlock(BasicBlock *BB);      void dump(DenseMap<uint32_t, Value*>& d);      bool iterateOnFunction(Function &F); -    Value* CollapsePhi(PHINode* p); -    bool isSafeReplacement(PHINode* p, Instruction* inst); +    Value *CollapsePhi(PHINode* p);      bool performPRE(Function& F); -    Value* lookupNumber(BasicBlock* BB, uint32_t num); -    bool mergeBlockIntoPredecessor(BasicBlock* BB); -    Value* AttemptRedundancyElimination(Instruction* orig, unsigned valno); +    Value *lookupNumber(BasicBlock *BB, uint32_t num);      void cleanupGlobalSets();      void verifyRemoved(const Instruction *I) const;    }; -   +    char GVN::ID = 0;  } @@ -756,107 +752,31 @@ void GVN::dump(DenseMap<uint32_t, Value*>& d) {    printf("}\n");  } -Value* GVN::CollapsePhi(PHINode* p) { -  Value* constVal = p->hasConstantValue(); -  if (!constVal) return 0; -   -  Instruction* inst = dyn_cast<Instruction>(constVal); -  if (!inst) -    return constVal; -     -  if (DT->dominates(inst, p)) -    if (isSafeReplacement(p, inst)) -      return inst; -  return 0; -} - -bool GVN::isSafeReplacement(PHINode* p, Instruction* inst) { +static bool isSafeReplacement(PHINode* p, Instruction *inst) {    if (!isa<PHINode>(inst))      return true; -   +    for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();         UI != E; ++UI)      if (PHINode* use_phi = dyn_cast<PHINode>(UI))        if (use_phi->getParent() == inst->getParent())          return false; -   +    return true;  } -/// GetValueForBlock - Get the value to use within the specified basic block. -/// available values are in Phis. -Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig, -                             DenseMap<BasicBlock*, Value*> &Phis, -                             bool top_level) {  -                                  -  // If we have already computed this value, return the previously computed val. -  DenseMap<BasicBlock*, Value*>::iterator V = Phis.find(BB); -  if (V != Phis.end() && !top_level) return V->second; -   -  // If the block is unreachable, just return undef, since this path -  // can't actually occur at runtime. -  if (!DT->isReachableFromEntry(BB)) -    return Phis[BB] = Context->getUndef(orig->getType()); -   -  if (BasicBlock *Pred = BB->getSinglePredecessor()) { -    Value *ret = GetValueForBlock(Pred, orig, Phis); -    Phis[BB] = ret; -    return ret; -  } +Value *GVN::CollapsePhi(PHINode *PN) { +  Value *ConstVal = PN->hasConstantValue(DT); +  if (!ConstVal) return 0; -  // Get the number of predecessors of this block so we can reserve space later. -  // If there is already a PHI in it, use the #preds from it, otherwise count. -  // Getting it from the PHI is constant time. -  unsigned NumPreds; -  if (PHINode *ExistingPN = dyn_cast<PHINode>(BB->begin())) -    NumPreds = ExistingPN->getNumIncomingValues(); -  else -    NumPreds = std::distance(pred_begin(BB), pred_end(BB)); -   -  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so -  // now, then get values to fill in the incoming values for the PHI. -  PHINode *PN = PHINode::Create(orig->getType(), orig->getName()+".rle", -                                BB->begin()); -  PN->reserveOperandSpace(NumPreds); -   -  Phis.insert(std::make_pair(BB, PN)); -   -  // Fill in the incoming values for the block. -  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { -    Value* val = GetValueForBlock(*PI, orig, Phis); -    PN->addIncoming(val, *PI); -  } -   -  VN.getAliasAnalysis()->copyValue(orig, PN); -   -  // Attempt to collapse PHI nodes that are trivially redundant -  Value* v = CollapsePhi(PN); -  if (!v) { -    // Cache our phi construction results -    if (LoadInst* L = dyn_cast<LoadInst>(orig)) -      phiMap[L->getPointerOperand()].insert(PN); -    else -      phiMap[orig].insert(PN); -     -    return PN; -  } -     -  PN->replaceAllUsesWith(v); -  if (isa<PointerType>(v->getType())) -    MD->invalidateCachedPointerInfo(v); - -  for (DenseMap<BasicBlock*, Value*>::iterator I = Phis.begin(), -       E = Phis.end(); I != E; ++I) -    if (I->second == PN) -      I->second = v; - -  DEBUG(cerr << "GVN removed: " << *PN); -  MD->removeInstruction(PN); -  PN->eraseFromParent(); -  DEBUG(verifyRemoved(PN)); - -  Phis[BB] = v; -  return v; +  Instruction *Inst = dyn_cast<Instruction>(ConstVal); +  if (!Inst) +    return ConstVal; + +  if (DT->dominates(Inst, PN)) +    if (isSafeReplacement(PN, Inst)) +      return Inst; +  return 0;  }  /// IsValueFullyAvailableInBlock - Return true if we can prove that the value @@ -869,11 +789,11 @@ Value *GVN::GetValueForBlock(BasicBlock *BB, Instruction* orig,  ///      currently speculating that it will be.  ///   3) we are speculating for this block and have used that to speculate for  ///      other blocks. -static bool IsValueFullyAvailableInBlock(BasicBlock *BB,  +static bool IsValueFullyAvailableInBlock(BasicBlock *BB,                              DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {    // Optimistically assume that the block is fully available and check to see    // if we already know about this block in one lookup. -  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =  +  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =      FullyAvailableBlocks.insert(std::make_pair(BB, 2));    // If the entry already existed for this block, return the precomputed value. @@ -884,29 +804,29 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,        IV.first->second = 3;      return IV.first->second != 0;    } -   +    // Otherwise, see if it is fully available in all predecessors.    pred_iterator PI = pred_begin(BB), PE = pred_end(BB); -   +    // If this block has no predecessors, it isn't live-in here.    if (PI == PE)      goto SpeculationFailure; -   +    for (; PI != PE; ++PI)      // If the value isn't fully available in one of our predecessors, then it      // isn't fully available in this block either.  Undo our previous      // optimistic assumption and bail out.      if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))        goto SpeculationFailure; -   +    return true; -   +  // SpeculationFailure - If we get here, we found out that this is not, after  // all, a fully-available block.  We have a problem if we speculated on this and  // used the speculation to mark other blocks as available.  SpeculationFailure:    char &BBVal = FullyAvailableBlocks[BB]; -   +    // If we didn't speculate on this, just return with it set to false.    if (BBVal == 2) {      BBVal = 0; @@ -918,7 +838,7 @@ SpeculationFailure:    // 0 if set to one.    SmallVector<BasicBlock*, 32> BBWorklist;    BBWorklist.push_back(BB); -   +    while (!BBWorklist.empty()) {      BasicBlock *Entry = BBWorklist.pop_back_val();      // Note that this sets blocks to 0 (unavailable) if they happen to not @@ -928,24 +848,372 @@ SpeculationFailure:      // Mark as unavailable.      EntryVal = 0; -     +      for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)        BBWorklist.push_back(*I);    } -   +    return false;  } + +/// CanCoerceMustAliasedValueToLoad - Return true if +/// CoerceAvailableValueToLoadType will succeed. +static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal, +                                            const Type *LoadTy, +                                            const TargetData &TD) { +  // If the loaded or stored value is an first class array or struct, don't try +  // to transform them.  We need to be able to bitcast to integer. +  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) || +      isa<StructType>(StoredVal->getType()) || +      isa<ArrayType>(StoredVal->getType())) +    return false; +   +  // The store has to be at least as big as the load. +  if (TD.getTypeSizeInBits(StoredVal->getType()) < +        TD.getTypeSizeInBits(LoadTy)) +    return false; +   +  return true; +} +   + +/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and +/// then a load from a must-aliased pointer of a different type, try to coerce +/// the stored value.  LoadedTy is the type of the load we want to replace and +/// InsertPt is the place to insert new instructions. +/// +/// If we can't do it, return null. +static Value *CoerceAvailableValueToLoadType(Value *StoredVal,  +                                             const Type *LoadedTy, +                                             Instruction *InsertPt, +                                             const TargetData &TD) { +  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) +    return 0; +   +  const Type *StoredValTy = StoredVal->getType(); +   +  uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy); +  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); +   +  // If the store and reload are the same size, we can always reuse it. +  if (StoreSize == LoadSize) { +    if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) { +      // Pointer to Pointer -> use bitcast. +      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); +    } +     +    // Convert source pointers to integers, which can be bitcast. +    if (isa<PointerType>(StoredValTy)) { +      StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); +      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); +    } +     +    const Type *TypeToCastTo = LoadedTy; +    if (isa<PointerType>(TypeToCastTo)) +      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext()); +     +    if (StoredValTy != TypeToCastTo) +      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt); +     +    // Cast to pointer if the load needs a pointer type. +    if (isa<PointerType>(LoadedTy)) +      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt); +     +    return StoredVal; +  } +   +  // If the loaded value is smaller than the available value, then we can +  // extract out a piece from it.  If the available value is too small, then we +  // can't do anything. +  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail"); +   +  // Convert source pointers to integers, which can be manipulated. +  if (isa<PointerType>(StoredValTy)) { +    StoredValTy = TD.getIntPtrType(StoredValTy->getContext()); +    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt); +  } +   +  // Convert vectors and fp to integer, which can be manipulated. +  if (!isa<IntegerType>(StoredValTy)) { +    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize); +    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt); +  } +   +  // If this is a big-endian system, we need to shift the value down to the low +  // bits so that a truncate will work. +  if (TD.isBigEndian()) { +    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize); +    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt); +  } +   +  // Truncate the integer to the right size now. +  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize); +  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt); +   +  if (LoadedTy == NewIntTy) +    return StoredVal; +   +  // If the result is a pointer, inttoptr. +  if (isa<PointerType>(LoadedTy)) +    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt); +   +  // Otherwise, bitcast. +  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt); +} + +/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can +/// be expressed as a base pointer plus a constant offset.  Return the base and +/// offset to the caller. +static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset, +                                        const TargetData &TD) { +  Operator *PtrOp = dyn_cast<Operator>(Ptr); +  if (PtrOp == 0) return Ptr; +   +  // Just look through bitcasts. +  if (PtrOp->getOpcode() == Instruction::BitCast) +    return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); +   +  // If this is a GEP with constant indices, we can look through it. +  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); +  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; +   +  gep_type_iterator GTI = gep_type_begin(GEP); +  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; +       ++I, ++GTI) { +    ConstantInt *OpC = cast<ConstantInt>(*I); +    if (OpC->isZero()) continue; +     +    // Handle a struct and array indices which add their offset to the pointer. +    if (const StructType *STy = dyn_cast<StructType>(*GTI)) { +      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); +    } else { +      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); +      Offset += OpC->getSExtValue()*Size; +    } +  } +   +  // Re-sign extend from the pointer size if needed to get overflow edge cases +  // right. +  unsigned PtrSize = TD.getPointerSizeInBits(); +  if (PtrSize < 64) +    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize); +   +  return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); +} + + +/// AnalyzeLoadFromClobberingStore - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store.  This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias.  Check this case to see if there is +/// anything more we can do before we give up.  This returns -1 if we have to +/// give up, or a byte number in the stored value of the piece that feeds the +/// load. +static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI, +                                          const TargetData &TD) { +  // If the loaded or stored value is an first class array or struct, don't try +  // to transform them.  We need to be able to bitcast to integer. +  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) || +      isa<StructType>(DepSI->getOperand(0)->getType()) || +      isa<ArrayType>(DepSI->getOperand(0)->getType())) +    return -1; +   +  int64_t StoreOffset = 0, LoadOffset = 0; +  Value *StoreBase =  +    GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD); +  Value *LoadBase =  +    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD); +  if (StoreBase != LoadBase) +    return -1; +   +  // If the load and store are to the exact same address, they should have been +  // a must alias.  AA must have gotten confused. +  // FIXME: Study to see if/when this happens. +  if (LoadOffset == StoreOffset) { +#if 0 +    errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n" +    << "Base       = " << *StoreBase << "\n" +    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n" +    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" +    << "Load Ptr   = " << *L->getPointerOperand() << "\n" +    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n"; +    errs() << "'" << L->getParent()->getParent()->getName() << "'" +    << *L->getParent(); +#endif +    return -1; +  } +   +  // If the load and store don't overlap at all, the store doesn't provide +  // anything to the load.  In this case, they really don't alias at all, AA +  // must have gotten confused. +  // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then +  // remove this check, as it is duplicated with what we have below. +  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType()); +  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType()); +   +  if ((StoreSize & 7) | (LoadSize & 7)) +    return -1; +  StoreSize >>= 3;  // Convert to bytes. +  LoadSize >>= 3; +   +   +  bool isAAFailure = false; +  if (StoreOffset < LoadOffset) { +    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset; +  } else { +    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset; +  } +  if (isAAFailure) { +#if 0 +    errs() << "STORE LOAD DEP WITH COMMON BASE:\n" +    << "Base       = " << *StoreBase << "\n" +    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n" +    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n" +    << "Load Ptr   = " << *L->getPointerOperand() << "\n" +    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n"; +    errs() << "'" << L->getParent()->getParent()->getName() << "'" +    << *L->getParent(); +#endif +    return -1; +  } +   +  // If the Load isn't completely contained within the stored bits, we don't +  // have all the bits to feed it.  We could do something crazy in the future +  // (issue a smaller load then merge the bits in) but this seems unlikely to be +  // valuable. +  if (StoreOffset > LoadOffset || +      StoreOffset+StoreSize < LoadOffset+LoadSize) +    return -1; +   +  // Okay, we can do this transformation.  Return the number of bytes into the +  // store that the load is. +  return LoadOffset-StoreOffset; +}   + + +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering store.  This means +/// that the store *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias.  Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, +                                   const Type *LoadTy, +                                   Instruction *InsertPt, const TargetData &TD){ +  LLVMContext &Ctx = SrcVal->getType()->getContext(); +   +  uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8; +  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8; +   +   +  // Compute which bits of the stored value are being used by the load.  Convert +  // to an integer type to start with. +  if (isa<PointerType>(SrcVal->getType())) +    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt); +  if (!isa<IntegerType>(SrcVal->getType())) +    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), +                             "tmp", InsertPt); +   +  // Shift the bits to the least significant depending on endianness. +  unsigned ShiftAmt; +  if (TD.isLittleEndian()) { +    ShiftAmt = Offset*8; +  } else { +    ShiftAmt = (StoreSize-LoadSize-Offset)*8; +  } +   +  if (ShiftAmt) +    SrcVal = BinaryOperator::CreateLShr(SrcVal, +                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt); +   +  if (LoadSize != StoreSize) +    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), +                           "tmp", InsertPt); +   +  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); +} + +struct AvailableValueInBlock { +  /// BB - The basic block in question. +  BasicBlock *BB; +  /// V - The value that is live out of the block. +  Value *V; +  /// Offset - The byte offset in V that is interesting for the load query. +  unsigned Offset; +   +  static AvailableValueInBlock get(BasicBlock *BB, Value *V, +                                   unsigned Offset = 0) { +    AvailableValueInBlock Res; +    Res.BB = BB; +    Res.V = V; +    Res.Offset = Offset; +    return Res; +  } +}; + +/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, +/// construct SSA form, allowing us to eliminate LI.  This returns the value +/// that should be used at LI's definition site. +static Value *ConstructSSAForLoadSet(LoadInst *LI,  +                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, +                                     const TargetData *TD, +                                     AliasAnalysis *AA) { +  SmallVector<PHINode*, 8> NewPHIs; +  SSAUpdater SSAUpdate(&NewPHIs); +  SSAUpdate.Initialize(LI); +   +  const Type *LoadTy = LI->getType(); +   +  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) { +    BasicBlock *BB = ValuesPerBlock[i].BB; +    Value *AvailableVal = ValuesPerBlock[i].V; +    unsigned Offset = ValuesPerBlock[i].Offset; +     +    if (SSAUpdate.HasValueForBlock(BB)) +      continue; +     +    if (AvailableVal->getType() != LoadTy) { +      assert(TD && "Need target data to handle type mismatch case"); +      AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy, +                                          BB->getTerminator(), *TD); +       +      if (Offset) { +        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" +              << *ValuesPerBlock[i].V << '\n' +              << *AvailableVal << '\n' << "\n\n\n"); +      } +       +       +      DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n" +            << *ValuesPerBlock[i].V << '\n' +            << *AvailableVal << '\n' << "\n\n\n"); +    } +     +    SSAUpdate.AddAvailableValue(BB, AvailableVal); +  } +   +  // Perform PHI construction. +  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); +   +  // If new PHI nodes were created, notify alias analysis. +  if (isa<PointerType>(V->getType())) +    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) +      AA->copyValue(LI, NewPHIs[i]); + +  return V; +} +  /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are  /// non-local by performing PHI construction.  bool GVN::processNonLocalLoad(LoadInst *LI,                                SmallVectorImpl<Instruction*> &toErase) {    // Find the non-local dependencies of the load. -  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;  +  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;    MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),                                     Deps); -  //DEBUG(cerr << "INVESTIGATING NONLOCAL LOAD: " << Deps.size() << *LI); -   +  //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: " +  //             << Deps.size() << *LI << '\n'); +    // If we had to process more than one hundred blocks to find the    // dependencies, this load isn't worth worrying about.  Optimizing    // it will be too expensive. @@ -956,106 +1224,124 @@ bool GVN::processNonLocalLoad(LoadInst *LI,    // clobber in the current block.  Reject this early.    if (Deps.size() == 1 && Deps[0].second.isClobber()) {      DEBUG( -      DOUT << "GVN: non-local load "; -      WriteAsOperand(*DOUT.stream(), LI); -      DOUT << " is clobbered by " << *Deps[0].second.getInst(); +      errs() << "GVN: non-local load "; +      WriteAsOperand(errs(), LI); +      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';      );      return false;    } -   +    // Filter out useless results (non-locals, etc).  Keep track of the blocks    // where we have a value available in repl, also keep track of whether we see    // dependencies that produce an unknown value for the load (such as a call    // that could potentially clobber the load). -  SmallVector<std::pair<BasicBlock*, Value*>, 16> ValuesPerBlock; +  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;    SmallVector<BasicBlock*, 16> UnavailableBlocks; + +  const TargetData *TD = 0;    for (unsigned i = 0, e = Deps.size(); i != e; ++i) {      BasicBlock *DepBB = Deps[i].first;      MemDepResult DepInfo = Deps[i].second; -     +      if (DepInfo.isClobber()) { +      // If the dependence is to a store that writes to a superset of the bits +      // read by the load, we can extract the bits we need for the load from the +      // stored value. +      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) { +        if (TD == 0) +          TD = getAnalysisIfAvailable<TargetData>(); +        if (TD) { +          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD); +          if (Offset != -1) { +            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, +                                                           DepSI->getOperand(0), +                                                                Offset)); +            continue; +          } +        } +      } +       +      // FIXME: Handle memset/memcpy.        UnavailableBlocks.push_back(DepBB);        continue;      } -     +      Instruction *DepInst = DepInfo.getInst(); -     +      // Loading the allocation -> undef. -    if (isa<AllocationInst>(DepInst)) { -      ValuesPerBlock.push_back(std::make_pair(DepBB,  -                                            Context->getUndef(LI->getType()))); +    if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) { +      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, +                                             UndefValue::get(LI->getType())));        continue;      } -   -    if (StoreInst* S = dyn_cast<StoreInst>(DepInst)) { -      // Reject loads and stores that are to the same address but are of  -      // different types. -      // NOTE: 403.gcc does have this case (e.g. in readonly_fields_p) because -      // of bitfield access, it would be interesting to optimize for it at some -      // point. + +    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { +      // Reject loads and stores that are to the same address but are of +      // different types if we have to.        if (S->getOperand(0)->getType() != LI->getType()) { -        UnavailableBlocks.push_back(DepBB); -        continue; +        if (TD == 0) +          TD = getAnalysisIfAvailable<TargetData>(); +         +        // If the stored value is larger or equal to the loaded value, we can +        // reuse it. +        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getOperand(0), +                                                        LI->getType(), *TD)) { +          UnavailableBlocks.push_back(DepBB); +          continue; +        }        } -       -      ValuesPerBlock.push_back(std::make_pair(DepBB, S->getOperand(0))); -       -    } else if (LoadInst* LD = dyn_cast<LoadInst>(DepInst)) { + +      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, +                                                          S->getOperand(0))); +      continue; +    } +     +    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) { +      // If the types mismatch and we can't handle it, reject reuse of the load.        if (LD->getType() != LI->getType()) { -        UnavailableBlocks.push_back(DepBB); -        continue; +        if (TD == 0) +          TD = getAnalysisIfAvailable<TargetData>(); +         +        // If the stored value is larger or equal to the loaded value, we can +        // reuse it. +        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){ +          UnavailableBlocks.push_back(DepBB); +          continue; +        }                  } -      ValuesPerBlock.push_back(std::make_pair(DepBB, LD)); -    } else { -      UnavailableBlocks.push_back(DepBB); +      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));        continue;      } +     +    UnavailableBlocks.push_back(DepBB); +    continue;    } -   +    // If we have no predecessors that produce a known value for this load, exit    // early.    if (ValuesPerBlock.empty()) return false; -   +    // If all of the instructions we depend on produce a known value for this    // load, then it is fully redundant and we can use PHI insertion to compute    // its value.  Insert PHIs and remove the fully redundant value now.    if (UnavailableBlocks.empty()) { -    // Use cached PHI construction information from previous runs -    SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; -    // FIXME: What does phiMap do? Are we positive it isn't getting invalidated? -    for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); -         I != E; ++I) { -      if ((*I)->getParent() == LI->getParent()) { -        DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD #1: " << *LI); -        LI->replaceAllUsesWith(*I); -        if (isa<PointerType>((*I)->getType())) -          MD->invalidateCachedPointerInfo(*I); -        toErase.push_back(LI); -        NumGVNLoad++; -        return true; -      } -       -      ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); -    } -     -    DEBUG(cerr << "GVN REMOVING NONLOCAL LOAD: " << *LI); +    DEBUG(errs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); -    DenseMap<BasicBlock*, Value*> BlockReplValues; -    BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end());      // Perform PHI construction. -    Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); -    LI->replaceAllUsesWith(v); -     -    if (isa<PHINode>(v)) -      v->takeName(LI); -    if (isa<PointerType>(v->getType())) -      MD->invalidateCachedPointerInfo(v); +    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, +                                      VN.getAliasAnalysis()); +    LI->replaceAllUsesWith(V); + +    if (isa<PHINode>(V)) +      V->takeName(LI); +    if (isa<PointerType>(V->getType())) +      MD->invalidateCachedPointerInfo(V);      toErase.push_back(LI);      NumGVNLoad++;      return true;    } -   +    if (!EnablePRE || !EnableLoadPRE)      return false; @@ -1066,7 +1352,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,    // prefer to not increase code size.  As such, we only do this when we know    // that we only have to insert *one* load (which means we're basically moving    // the load, not inserting a new one). -   +    SmallPtrSet<BasicBlock *, 4> Blockers;    for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)      Blockers.insert(UnavailableBlocks[i]); @@ -1090,28 +1376,28 @@ bool GVN::processNonLocalLoad(LoadInst *LI,      if (TmpBB->getTerminator()->getNumSuccessors() != 1)        allSingleSucc = false;    } -   +    assert(TmpBB);    LoadBB = TmpBB; -   +    // If we have a repl set with LI itself in it, this means we have a loop where    // at least one of the values is LI.  Since this means that we won't be able    // to eliminate LI even if we insert uses in the other predecessors, we will    // end up increasing code size.  Reject this by scanning for LI.    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) -    if (ValuesPerBlock[i].second == LI) +    if (ValuesPerBlock[i].V == LI)        return false; -   +    if (isSinglePred) {      bool isHot = false;      for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) -      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].second)) -	// "Hot" Instruction is in some loop (because it dominates its dep.  -	// instruction). -	if (DT->dominates(LI, I)) {  -	  isHot = true; -	  break; -	} +      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V)) +        // "Hot" Instruction is in some loop (because it dominates its dep. +        // instruction). +        if (DT->dominates(LI, I)) { +          isHot = true; +          break; +        }      // We are interested only in "hot" instructions. We don't want to do any      // mis-optimizations here. @@ -1128,7 +1414,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,    DenseMap<BasicBlock*, char> FullyAvailableBlocks;    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) -    FullyAvailableBlocks[ValuesPerBlock[i].first] = true; +    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;    for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)      FullyAvailableBlocks[UnavailableBlocks[i]] = false; @@ -1136,33 +1422,33 @@ bool GVN::processNonLocalLoad(LoadInst *LI,         PI != E; ++PI) {      if (IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))        continue; -     +      // If this load is not available in multiple predecessors, reject it.      if (UnavailablePred && UnavailablePred != *PI)        return false;      UnavailablePred = *PI;    } -   +    assert(UnavailablePred != 0 &&           "Fully available value should be eliminated above!"); -   +    // If the loaded pointer is PHI node defined in this block, do PHI translation    // to get its value in the predecessor.    Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); -   +    // Make sure the value is live in the predecessor.  If it was defined by a    // non-PHI instruction in this block, we don't know how to recompute it above.    if (Instruction *LPInst = dyn_cast<Instruction>(LoadPtr))      if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { -      DEBUG(cerr << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " -                 << *LPInst << *LI << "\n"); +      DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " +                   << *LPInst << '\n' << *LI << "\n");        return false;      } -   +    // We don't currently handle critical edges :(    if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { -    DEBUG(cerr << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" -                << UnavailablePred->getName() << "': " << *LI); +    DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" +                 << UnavailablePred->getName() << "': " << *LI << '\n');      return false;    } @@ -1182,28 +1468,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI,    // Okay, we can eliminate this load by inserting a reload in the predecessor    // and using PHI construction to get the value in the other predecessors, do    // it. -  DEBUG(cerr << "GVN REMOVING PRE LOAD: " << *LI); -   +  DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); +    Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,                                  LI->getAlignment(),                                  UnavailablePred->getTerminator()); -   -  SmallPtrSet<Instruction*, 4> &p = phiMap[LI->getPointerOperand()]; -  for (SmallPtrSet<Instruction*, 4>::iterator I = p.begin(), E = p.end(); -       I != E; ++I) -    ValuesPerBlock.push_back(std::make_pair((*I)->getParent(), *I)); -   -  DenseMap<BasicBlock*, Value*> BlockReplValues; -  BlockReplValues.insert(ValuesPerBlock.begin(), ValuesPerBlock.end()); -  BlockReplValues[UnavailablePred] = NewLoad; -   + +  // Add the newly created load. +  ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,NewLoad)); +    // Perform PHI construction. -  Value* v = GetValueForBlock(LI->getParent(), LI, BlockReplValues, true); -  LI->replaceAllUsesWith(v); -  if (isa<PHINode>(v)) -    v->takeName(LI); -  if (isa<PointerType>(v->getType())) -    MD->invalidateCachedPointerInfo(v); +  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, +                                    VN.getAliasAnalysis()); +  LI->replaceAllUsesWith(V); +  if (isa<PHINode>(V)) +    V->takeName(LI); +  if (isa<PointerType>(V->getType())) +    MD->invalidateCachedPointerInfo(V);    toErase.push_back(LI);    NumPRELoad++;    return true; @@ -1214,64 +1495,119 @@ bool GVN::processNonLocalLoad(LoadInst *LI,  bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {    if (L->isVolatile())      return false; -   -  Value* pointer = L->getPointerOperand();    // ... to a pointer that has been loaded from before... -  MemDepResult dep = MD->getDependency(L); -   +  MemDepResult Dep = MD->getDependency(L); +    // If the value isn't available, don't do anything! -  if (dep.isClobber()) { +  if (Dep.isClobber()) { +    // FIXME: We should handle memset/memcpy/memmove as dependent instructions +    // to forward the value if available. +    //if (isa<MemIntrinsic>(Dep.getInst())) +    //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n"; +     +    // Check to see if we have something like this: +    //   store i32 123, i32* %P +    //   %A = bitcast i32* %P to i8* +    //   %B = gep i8* %A, i32 1 +    //   %C = load i8* %B +    // +    // We could do that by recognizing if the clobber instructions are obviously +    // a common base + constant offset, and if the previous store (or memset) +    // completely covers this load.  This sort of thing can happen in bitfield +    // access code. +    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) +      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) { +        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD); +        if (Offset != -1) { +          Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset, +                                                 L->getType(), L, *TD); +          DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n' +                       << *AvailVal << '\n' << *L << "\n\n\n"); +     +          // Replace the load! +          L->replaceAllUsesWith(AvailVal); +          if (isa<PointerType>(AvailVal->getType())) +            MD->invalidateCachedPointerInfo(AvailVal); +          toErase.push_back(L); +          NumGVNLoad++; +          return true; +        } +      } +          DEBUG(        // fast print dep, using operator<< on instruction would be too slow -      DOUT << "GVN: load "; -      WriteAsOperand(*DOUT.stream(), L); -      Instruction *I = dep.getInst(); -      DOUT << " is clobbered by " << *I; +      errs() << "GVN: load "; +      WriteAsOperand(errs(), L); +      Instruction *I = Dep.getInst(); +      errs() << " is clobbered by " << *I << '\n';      );      return false;    }    // If it is defined in another block, try harder. -  if (dep.isNonLocal()) +  if (Dep.isNonLocal())      return processNonLocalLoad(L, toErase); -  Instruction *DepInst = dep.getInst(); +  Instruction *DepInst = Dep.getInst();    if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) { -    // Only forward substitute stores to loads of the same type. -    // FIXME: Could do better! -    if (DepSI->getPointerOperand()->getType() != pointer->getType()) -      return false; +    Value *StoredVal = DepSI->getOperand(0); +    // The store and load are to a must-aliased pointer, but they may not +    // actually have the same type.  See if we know how to reuse the stored +    // value (depending on its type). +    const TargetData *TD = 0; +    if (StoredVal->getType() != L->getType() && +        (TD = getAnalysisIfAvailable<TargetData>())) { +      StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), +                                                 L, *TD); +      if (StoredVal == 0) +        return false; +       +      DEBUG(errs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal +                   << '\n' << *L << "\n\n\n"); +    } +      // Remove it! -    L->replaceAllUsesWith(DepSI->getOperand(0)); -    if (isa<PointerType>(DepSI->getOperand(0)->getType())) -      MD->invalidateCachedPointerInfo(DepSI->getOperand(0)); +    L->replaceAllUsesWith(StoredVal); +    if (isa<PointerType>(StoredVal->getType())) +      MD->invalidateCachedPointerInfo(StoredVal);      toErase.push_back(L);      NumGVNLoad++;      return true;    }    if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) { -    // Only forward substitute stores to loads of the same type. -    // FIXME: Could do better! load i32 -> load i8 -> truncate on little endian. -    if (DepLI->getType() != L->getType()) -      return false; +    Value *AvailableVal = DepLI; +     +    // The loads are of a must-aliased pointer, but they may not actually have +    // the same type.  See if we know how to reuse the previously loaded value +    // (depending on its type). +    const TargetData *TD = 0; +    if (DepLI->getType() != L->getType() && +        (TD = getAnalysisIfAvailable<TargetData>())) { +      AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); +      if (AvailableVal == 0) +        return false; +       +      DEBUG(errs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal +                   << "\n" << *L << "\n\n\n"); +    }      // Remove it! -    L->replaceAllUsesWith(DepLI); +    L->replaceAllUsesWith(AvailableVal);      if (isa<PointerType>(DepLI->getType()))        MD->invalidateCachedPointerInfo(DepLI);      toErase.push_back(L);      NumGVNLoad++;      return true;    } -   +    // If this load really doesn't depend on anything, then we must be loading an    // undef value.  This can happen when loading for a fresh allocation with no    // intervening stores, for example. -  if (isa<AllocationInst>(DepInst)) { -    L->replaceAllUsesWith(Context->getUndef(L->getType())); +  if (isa<AllocationInst>(DepInst) || isMalloc(DepInst)) { +    L->replaceAllUsesWith(UndefValue::get(L->getType()));      toErase.push_back(L);      NumGVNLoad++;      return true; @@ -1280,150 +1616,93 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {    return false;  } -Value* GVN::lookupNumber(BasicBlock* BB, uint32_t num) { +Value *GVN::lookupNumber(BasicBlock *BB, uint32_t num) {    DenseMap<BasicBlock*, ValueNumberScope*>::iterator I = localAvail.find(BB);    if (I == localAvail.end())      return 0; -   -  ValueNumberScope* locals = I->second; -   -  while (locals) { -    DenseMap<uint32_t, Value*>::iterator I = locals->table.find(num); -    if (I != locals->table.end()) + +  ValueNumberScope *Locals = I->second; +  while (Locals) { +    DenseMap<uint32_t, Value*>::iterator I = Locals->table.find(num); +    if (I != Locals->table.end())        return I->second; -    else -      locals = locals->parent; +    Locals = Locals->parent;    } -   +    return 0;  } -/// AttemptRedundancyElimination - If the "fast path" of redundancy elimination -/// by inheritance from the dominator fails, see if we can perform phi  -/// construction to eliminate the redundancy. -Value* GVN::AttemptRedundancyElimination(Instruction* orig, unsigned valno) { -  BasicBlock* BaseBlock = orig->getParent(); -   -  SmallPtrSet<BasicBlock*, 4> Visited; -  SmallVector<BasicBlock*, 8> Stack; -  Stack.push_back(BaseBlock); -   -  DenseMap<BasicBlock*, Value*> Results; -   -  // Walk backwards through our predecessors, looking for instances of the -  // value number we're looking for.  Instances are recorded in the Results -  // map, which is then used to perform phi construction. -  while (!Stack.empty()) { -    BasicBlock* Current = Stack.back(); -    Stack.pop_back(); -     -    // If we've walked all the way to a proper dominator, then give up. Cases -    // where the instance is in the dominator will have been caught by the fast -    // path, and any cases that require phi construction further than this are -    // probably not worth it anyways.  Note that this is a SIGNIFICANT compile -    // time improvement. -    if (DT->properlyDominates(Current, orig->getParent())) return 0; -     -    DenseMap<BasicBlock*, ValueNumberScope*>::iterator LA = -                                                       localAvail.find(Current); -    if (LA == localAvail.end()) return 0; -    DenseMap<uint32_t, Value*>::iterator V = LA->second->table.find(valno); -     -    if (V != LA->second->table.end()) { -      // Found an instance, record it. -      Results.insert(std::make_pair(Current, V->second)); -      continue; -    } -     -    // If we reach the beginning of the function, then give up. -    if (pred_begin(Current) == pred_end(Current)) -      return 0; -     -    for (pred_iterator PI = pred_begin(Current), PE = pred_end(Current); -         PI != PE; ++PI) -      if (Visited.insert(*PI)) -        Stack.push_back(*PI); -  } -   -  // If we didn't find instances, give up.  Otherwise, perform phi construction. -  if (Results.size() == 0) -    return 0; -  else -    return GetValueForBlock(BaseBlock, orig, Results, true); -}  /// processInstruction - When calculating availability, handle an instruction  /// by inserting it into the appropriate sets  bool GVN::processInstruction(Instruction *I,                               SmallVectorImpl<Instruction*> &toErase) { -  if (LoadInst* L = dyn_cast<LoadInst>(I)) { -    bool changed = processLoad(L, toErase); -     -    if (!changed) { -      unsigned num = VN.lookup_or_add(L); -      localAvail[I->getParent()]->table.insert(std::make_pair(num, L)); +  if (LoadInst *LI = dyn_cast<LoadInst>(I)) { +    bool Changed = processLoad(LI, toErase); + +    if (!Changed) { +      unsigned Num = VN.lookup_or_add(LI); +      localAvail[I->getParent()]->table.insert(std::make_pair(Num, LI));      } -     -    return changed; + +    return Changed;    } -   -  uint32_t nextNum = VN.getNextUnusedValueNumber(); -  unsigned num = VN.lookup_or_add(I); -   -  if (BranchInst* BI = dyn_cast<BranchInst>(I)) { -    localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); -     + +  uint32_t NextNum = VN.getNextUnusedValueNumber(); +  unsigned Num = VN.lookup_or_add(I); + +  if (BranchInst *BI = dyn_cast<BranchInst>(I)) { +    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); +      if (!BI->isConditional() || isa<Constant>(BI->getCondition()))        return false; -     -    Value* branchCond = BI->getCondition(); -    uint32_t condVN = VN.lookup_or_add(branchCond); -     -    BasicBlock* trueSucc = BI->getSuccessor(0); -    BasicBlock* falseSucc = BI->getSuccessor(1); -     -    if (trueSucc->getSinglePredecessor()) -      localAvail[trueSucc]->table[condVN] = Context->getConstantIntTrue(); -    if (falseSucc->getSinglePredecessor()) -      localAvail[falseSucc]->table[condVN] = Context->getConstantIntFalse(); + +    Value *BranchCond = BI->getCondition(); +    uint32_t CondVN = VN.lookup_or_add(BranchCond); + +    BasicBlock *TrueSucc = BI->getSuccessor(0); +    BasicBlock *FalseSucc = BI->getSuccessor(1); + +    if (TrueSucc->getSinglePredecessor()) +      localAvail[TrueSucc]->table[CondVN] = +        ConstantInt::getTrue(TrueSucc->getContext()); +    if (FalseSucc->getSinglePredecessor()) +      localAvail[FalseSucc]->table[CondVN] = +        ConstantInt::getFalse(TrueSucc->getContext());      return false; -     +    // Allocations are always uniquely numbered, so we can save time and memory -  // by fast failing them.   +  // by fast failing them.    } else if (isa<AllocationInst>(I) || isa<TerminatorInst>(I)) { -    localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); +    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));      return false;    } -   +    // Collapse PHI nodes    if (PHINode* p = dyn_cast<PHINode>(I)) { -    Value* constVal = CollapsePhi(p); -     +    Value *constVal = CollapsePhi(p); +      if (constVal) { -      for (PhiMapType::iterator PI = phiMap.begin(), PE = phiMap.end(); -           PI != PE; ++PI) -        PI->second.erase(p); -                p->replaceAllUsesWith(constVal);        if (isa<PointerType>(constVal->getType()))          MD->invalidateCachedPointerInfo(constVal);        VN.erase(p); -       +        toErase.push_back(p);      } else { -      localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); +      localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));      } -   +    // If the number we were assigned was a brand new VN, then we don't    // need to do a lookup to see if the number already exists    // somewhere in the domtree: it can't! -  } else if (num == nextNum) { -    localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); -     +  } else if (Num == NextNum) { +    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I)); +    // Perform fast-path value-number based elimination of values inherited from    // dominators. -  } else if (Value* repl = lookupNumber(I->getParent(), num)) { +  } else if (Value *repl = lookupNumber(I->getParent(), Num)) {      // Remove it!      VN.erase(I);      I->replaceAllUsesWith(repl); @@ -1432,21 +1711,10 @@ bool GVN::processInstruction(Instruction *I,      toErase.push_back(I);      return true; -#if 0 -  // Perform slow-pathvalue-number based elimination with phi construction. -  } else if (Value* repl = AttemptRedundancyElimination(I, num)) { -    // Remove it! -    VN.erase(I); -    I->replaceAllUsesWith(repl); -    if (isa<PointerType>(repl->getType())) -      MD->invalidateCachedPointerInfo(repl); -    toErase.push_back(I); -    return true; -#endif    } else { -    localAvail[I->getParent()]->table.insert(std::make_pair(num, I)); +    localAvail[I->getParent()]->table.insert(std::make_pair(Num, I));    } -   +    return false;  } @@ -1457,35 +1725,35 @@ bool GVN::runOnFunction(Function& F) {    VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());    VN.setMemDep(MD);    VN.setDomTree(DT); -   -  bool changed = false; -  bool shouldContinue = true; -   + +  bool Changed = false; +  bool ShouldContinue = true; +    // Merge unconditional branches, allowing PRE to catch more    // optimization opportunities.    for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) { -    BasicBlock* BB = FI; +    BasicBlock *BB = FI;      ++FI;      bool removedBlock = MergeBlockIntoPredecessor(BB, this);      if (removedBlock) NumGVNBlocks++; -     -    changed |= removedBlock; + +    Changed |= removedBlock;    } -   +    unsigned Iteration = 0; -   -  while (shouldContinue) { -    DEBUG(cerr << "GVN iteration: " << Iteration << "\n"); -    shouldContinue = iterateOnFunction(F); -    changed |= shouldContinue; + +  while (ShouldContinue) { +    DEBUG(errs() << "GVN iteration: " << Iteration << "\n"); +    ShouldContinue = iterateOnFunction(F); +    Changed |= ShouldContinue;      ++Iteration;    } -   +    if (EnablePRE) {      bool PREChanged = true;      while (PREChanged) {        PREChanged = performPRE(F); -      changed |= PREChanged; +      Changed |= PREChanged;      }    }    // FIXME: Should perform GVN again after PRE does something.  PRE can move @@ -1495,27 +1763,27 @@ bool GVN::runOnFunction(Function& F) {    cleanupGlobalSets(); -  return changed; +  return Changed;  } -bool GVN::processBlock(BasicBlock* BB) { +bool GVN::processBlock(BasicBlock *BB) {    // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and    // incrementing BI before processing an instruction).    SmallVector<Instruction*, 8> toErase; -  bool changed_function = false; -   +  bool ChangedFunction = false; +    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();         BI != BE;) { -    changed_function |= processInstruction(BI, toErase); +    ChangedFunction |= processInstruction(BI, toErase);      if (toErase.empty()) {        ++BI;        continue;      } -     +      // If we need some instructions deleted, do it now.      NumGVNInstr += toErase.size(); -     +      // Avoid iterator invalidation.      bool AtStart = BI == BB->begin();      if (!AtStart) @@ -1523,7 +1791,7 @@ bool GVN::processBlock(BasicBlock* BB) {      for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),           E = toErase.end(); I != E; ++I) { -      DEBUG(cerr << "GVN removed: " << **I); +      DEBUG(errs() << "GVN removed: " << **I << '\n');        MD->removeInstruction(*I);        (*I)->eraseFromParent();        DEBUG(verifyRemoved(*I)); @@ -1535,8 +1803,8 @@ bool GVN::processBlock(BasicBlock* BB) {      else        ++BI;    } -   -  return changed_function; + +  return ChangedFunction;  }  /// performPRE - Perform a purely local form of PRE that looks for diamond @@ -1547,32 +1815,33 @@ bool GVN::performPRE(Function& F) {    DenseMap<BasicBlock*, Value*> predMap;    for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),         DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { -    BasicBlock* CurrentBlock = *DI; -     +    BasicBlock *CurrentBlock = *DI; +      // Nothing to PRE in the entry block.      if (CurrentBlock == &F.getEntryBlock()) continue; -     +      for (BasicBlock::iterator BI = CurrentBlock->begin(),           BE = CurrentBlock->end(); BI != BE; ) {        Instruction *CurInst = BI++; -      if (isa<AllocationInst>(CurInst) || isa<TerminatorInst>(CurInst) || -          isa<PHINode>(CurInst) || (CurInst->getType() == Type::VoidTy) || +      if (isa<AllocationInst>(CurInst) || +          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) || +          CurInst->getType()->isVoidTy() ||            CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||            isa<DbgInfoIntrinsic>(CurInst))          continue; -      uint32_t valno = VN.lookup(CurInst); -       +      uint32_t ValNo = VN.lookup(CurInst); +        // Look for the predecessors for PRE opportunities.  We're        // only trying to solve the basic diamond case, where        // a value is computed in the successor and one predecessor,        // but not the other.  We also explicitly disallow cases        // where the successor is its own predecessor, because they're        // more complicated to get right. -      unsigned numWith = 0; -      unsigned numWithout = 0; -      BasicBlock* PREPred = 0; +      unsigned NumWith = 0; +      unsigned NumWithout = 0; +      BasicBlock *PREPred = 0;        predMap.clear();        for (pred_iterator PI = pred_begin(CurrentBlock), @@ -1581,59 +1850,59 @@ bool GVN::performPRE(Function& F) {          // own predecessor, on in blocks with predecessors          // that are not reachable.          if (*PI == CurrentBlock) { -          numWithout = 2; +          NumWithout = 2;            break;          } else if (!localAvail.count(*PI))  { -          numWithout = 2; +          NumWithout = 2;            break;          } -         -        DenseMap<uint32_t, Value*>::iterator predV =  -                                            localAvail[*PI]->table.find(valno); + +        DenseMap<uint32_t, Value*>::iterator predV = +                                            localAvail[*PI]->table.find(ValNo);          if (predV == localAvail[*PI]->table.end()) {            PREPred = *PI; -          numWithout++; +          NumWithout++;          } else if (predV->second == CurInst) { -          numWithout = 2; +          NumWithout = 2;          } else {            predMap[*PI] = predV->second; -          numWith++; +          NumWith++;          }        } -       +        // Don't do PRE when it might increase code size, i.e. when        // we would need to insert instructions in more than one pred. -      if (numWithout != 1 || numWith == 0) +      if (NumWithout != 1 || NumWith == 0)          continue; -       +        // We can't do PRE safely on a critical edge, so instead we schedule        // the edge to be split and perform the PRE the next time we iterate        // on the function. -      unsigned succNum = 0; +      unsigned SuccNum = 0;        for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();             i != e; ++i)          if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) { -          succNum = i; +          SuccNum = i;            break;          } -         -      if (isCriticalEdge(PREPred->getTerminator(), succNum)) { -        toSplit.push_back(std::make_pair(PREPred->getTerminator(), succNum)); + +      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { +        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));          continue;        } -       +        // Instantiate the expression the in predecessor that lacked it.        // Because we are going top-down through the block, all value numbers        // will be available in the predecessor by the time we need them.  Any        // that weren't original present will have been instantiated earlier        // in this loop. -      Instruction* PREInstr = CurInst->clone(); +      Instruction *PREInstr = CurInst->clone();        bool success = true;        for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {          Value *Op = PREInstr->getOperand(i);          if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))            continue; -         +          if (Value *V = lookupNumber(PREPred, VN.lookup(Op))) {            PREInstr->setOperand(i, V);          } else { @@ -1641,25 +1910,25 @@ bool GVN::performPRE(Function& F) {            break;          }        } -       +        // Fail out if we encounter an operand that is not available in -      // the PRE predecessor.  This is typically because of loads which  +      // the PRE predecessor.  This is typically because of loads which        // are not value numbered precisely.        if (!success) {          delete PREInstr;          DEBUG(verifyRemoved(PREInstr));          continue;        } -       +        PREInstr->insertBefore(PREPred->getTerminator());        PREInstr->setName(CurInst->getName() + ".pre");        predMap[PREPred] = PREInstr; -      VN.add(PREInstr, valno); +      VN.add(PREInstr, ValNo);        NumGVNPRE++; -       +        // Update the availability map to include the new instruction. -      localAvail[PREPred]->table.insert(std::make_pair(valno, PREInstr)); -       +      localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); +        // Create a PHI to make the value available in this block.        PHINode* Phi = PHINode::Create(CurInst->getType(),                                       CurInst->getName() + ".pre-phi", @@ -1667,27 +1936,27 @@ bool GVN::performPRE(Function& F) {        for (pred_iterator PI = pred_begin(CurrentBlock),             PE = pred_end(CurrentBlock); PI != PE; ++PI)          Phi->addIncoming(predMap[*PI], *PI); -       -      VN.add(Phi, valno); -      localAvail[CurrentBlock]->table[valno] = Phi; -       + +      VN.add(Phi, ValNo); +      localAvail[CurrentBlock]->table[ValNo] = Phi; +        CurInst->replaceAllUsesWith(Phi);        if (isa<PointerType>(Phi->getType()))          MD->invalidateCachedPointerInfo(Phi);        VN.erase(CurInst); -       -      DEBUG(cerr << "GVN PRE removed: " << *CurInst); + +      DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');        MD->removeInstruction(CurInst);        CurInst->eraseFromParent();        DEBUG(verifyRemoved(CurInst));        Changed = true;      }    } -   +    for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator         I = toSplit.begin(), E = toSplit.end(); I != E; ++I)      SplitCriticalEdge(I->first, I->second, this); -   +    return Changed || toSplit.size();  } @@ -1705,25 +1974,24 @@ bool GVN::iterateOnFunction(Function &F) {    }    // Top-down walk of the dominator tree -  bool changed = false; +  bool Changed = false;  #if 0    // Needed for value numbering with phi construction to work.    ReversePostOrderTraversal<Function*> RPOT(&F);    for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),         RE = RPOT.end(); RI != RE; ++RI) -    changed |= processBlock(*RI); +    Changed |= processBlock(*RI);  #else    for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),         DE = df_end(DT->getRootNode()); DI != DE; ++DI) -    changed |= processBlock(DI->getBlock()); +    Changed |= processBlock(DI->getBlock());  #endif -  return changed; +  return Changed;  }  void GVN::cleanupGlobalSets() {    VN.clear(); -  phiMap.clear();    for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator         I = localAvail.begin(), E = localAvail.end(); I != E; ++I) @@ -1736,18 +2004,6 @@ void GVN::cleanupGlobalSets() {  void GVN::verifyRemoved(const Instruction *Inst) const {    VN.verifyRemoved(Inst); -  // Walk through the PHI map to make sure the instruction isn't hiding in there -  // somewhere. -  for (PhiMapType::iterator -         I = phiMap.begin(), E = phiMap.end(); I != E; ++I) { -    assert(I->first != Inst && "Inst is still a key in PHI map!"); - -    for (SmallPtrSet<Instruction*, 4>::iterator -           II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { -      assert(*II != Inst && "Inst is still a value in PHI map!"); -    } -  } -    // Walk through the value number scope to make sure the instruction isn't    // ferreted away in it.    for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 88cf60ecbaa8..e2d9e0b9ec4a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -51,11 +51,11 @@  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/CommandLine.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/STLExtras.h" @@ -67,7 +67,7 @@ STATISTIC(NumReplaced, "Number of exit values replaced");  STATISTIC(NumLFTR    , "Number of loop exit tests replaced");  namespace { -  class VISIBILITY_HIDDEN IndVarSimplify : public LoopPass { +  class IndVarSimplify : public LoopPass {      IVUsers         *IU;      LoopInfo        *LI;      ScalarEvolution *SE; @@ -75,30 +75,30 @@ namespace {      bool Changed;    public: -   static char ID; // Pass identification, replacement for typeid -   IndVarSimplify() : LoopPass(&ID) {} - -   virtual bool runOnLoop(Loop *L, LPPassManager &LPM); - -   virtual void getAnalysisUsage(AnalysisUsage &AU) const { -     AU.addRequired<DominatorTree>(); -     AU.addRequired<ScalarEvolution>(); -     AU.addRequiredID(LoopSimplifyID); -     AU.addRequired<LoopInfo>(); -     AU.addRequired<IVUsers>(); -     AU.addRequiredID(LCSSAID); -     AU.addPreserved<ScalarEvolution>(); -     AU.addPreservedID(LoopSimplifyID); -     AU.addPreserved<IVUsers>(); -     AU.addPreservedID(LCSSAID); -     AU.setPreservesCFG(); -   } +    static char ID; // Pass identification, replacement for typeid +    IndVarSimplify() : LoopPass(&ID) {} + +    virtual bool runOnLoop(Loop *L, LPPassManager &LPM); + +    virtual void getAnalysisUsage(AnalysisUsage &AU) const { +      AU.addRequired<DominatorTree>(); +      AU.addRequired<LoopInfo>(); +      AU.addRequired<ScalarEvolution>(); +      AU.addRequiredID(LoopSimplifyID); +      AU.addRequiredID(LCSSAID); +      AU.addRequired<IVUsers>(); +      AU.addPreserved<ScalarEvolution>(); +      AU.addPreservedID(LoopSimplifyID); +      AU.addPreservedID(LCSSAID); +      AU.addPreserved<IVUsers>(); +      AU.setPreservesCFG(); +    }    private:      void RewriteNonIntegerIVs(Loop *L); -    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV* BackedgeTakenCount, +    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,                                     Value *IndVar,                                     BasicBlock *ExitingBlock,                                     BranchInst *BI, @@ -129,7 +129,7 @@ Pass *llvm::createIndVarSimplifyPass() {  /// SCEV analysis can determine a loop-invariant trip count of the loop, which  /// is actually a much broader range than just linear tests.  ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, -                                   const SCEV* BackedgeTakenCount, +                                   const SCEV *BackedgeTakenCount,                                     Value *IndVar,                                     BasicBlock *ExitingBlock,                                     BranchInst *BI, @@ -138,13 +138,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,    // against the preincremented value, otherwise we prefer to compare against    // the post-incremented value.    Value *CmpIndVar; -  const SCEV* RHS = BackedgeTakenCount; +  const SCEV *RHS = BackedgeTakenCount;    if (ExitingBlock == L->getLoopLatch()) {      // Add one to the "backedge-taken" count to get the trip count.      // If this addition may overflow, we have to be more pessimistic and      // cast the induction variable before doing the add. -    const SCEV* Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); -    const SCEV* N = +    const SCEV *Zero = SE->getIntegerSCEV(0, BackedgeTakenCount->getType()); +    const SCEV *N =        SE->getAddExpr(BackedgeTakenCount,                       SE->getIntegerSCEV(1, BackedgeTakenCount->getType()));      if ((isa<SCEVConstant>(N) && !N->isZero()) || @@ -182,13 +182,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,    else      Opcode = ICmpInst::ICMP_EQ; -  DOUT << "INDVARS: Rewriting loop exit condition to:\n" -       << "      LHS:" << *CmpIndVar // includes a newline -       << "       op:\t" -       << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" -       << "      RHS:\t" << *RHS << "\n"; +  DEBUG(errs() << "INDVARS: Rewriting loop exit condition to:\n" +               << "      LHS:" << *CmpIndVar << '\n' +               << "       op:\t" +               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" +               << "      RHS:\t" << *RHS << "\n"); -  ICmpInst *Cond = new ICmpInst(Opcode, CmpIndVar, ExitCnt, "exitcond", BI); +  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");    Instruction *OrigCond = cast<Instruction>(BI->getCondition());    // It's tempting to use replaceAllUsesWith here to fully replace the old @@ -264,7 +264,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,          // Okay, this instruction has a user outside of the current loop          // and varies predictably *inside* the loop.  Evaluate the value it          // contains when the loop exits, if possible. -        const SCEV* ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop()); +        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());          if (!ExitValue->isLoopInvariant(L))            continue; @@ -273,25 +273,23 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,          Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); -        DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal -             << "  LoopVal = " << *Inst << "\n"; +        DEBUG(errs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' +                     << "  LoopVal = " << *Inst << "\n");          PN->setIncomingValue(i, ExitVal);          // If this instruction is dead now, delete it.          RecursivelyDeleteTriviallyDeadInstructions(Inst); -        // If we're inserting code into the exit block rather than the -        // preheader, we can (and have to) remove the PHI entirely. -        // This is safe, because the NewVal won't be variant -        // in the loop, so we don't need an LCSSA phi node anymore. -        if (ExitBlocks.size() == 1) { +        if (NumPreds == 1) { +          // Completely replace a single-pred PHI. This is safe, because the +          // NewVal won't be variant in the loop, so we don't need an LCSSA phi +          // node anymore.            PN->replaceAllUsesWith(ExitVal);            RecursivelyDeleteTriviallyDeadInstructions(PN); -          break;          }        } -      if (ExitBlocks.size() != 1) { +      if (NumPreds != 1) {          // Clone the PHI and delete the original one. This lets IVUsers and          // any other maps purge the original user from their records.          PHINode *NewPN = PN->clone(); @@ -339,7 +337,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {    RewriteNonIntegerIVs(L);    BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null -  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); +  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);    // Create a rewriter object which we'll use to transform the code with.    SCEVExpander Rewriter(*SE); @@ -367,14 +365,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {        NeedCannIV = true;    }    for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { -    const SCEV* Stride = IU->StrideOrder[i]; +    const SCEV *Stride = IU->StrideOrder[i];      const Type *Ty = SE->getEffectiveSCEVType(Stride->getType());      if (!LargestType ||          SE->getTypeSizeInBits(Ty) >            SE->getTypeSizeInBits(LargestType))        LargestType = Ty; -    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = +    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =        IU->IVUsesByStride.find(IU->StrideOrder[i]);      assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -403,7 +401,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {      ++NumInserted;      Changed = true; -    DOUT << "INDVARS: New CanIV: " << *IndVar; +    DEBUG(errs() << "INDVARS: New CanIV: " << *IndVar << '\n');      // Now that the official induction variable is established, reinsert      // the old canonical-looking variable after it so that the IR remains @@ -458,9 +456,9 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,    // the need for the code evaluation methods to insert induction variables    // of different sizes.    for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { -    const SCEV* Stride = IU->StrideOrder[i]; +    const SCEV *Stride = IU->StrideOrder[i]; -    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = +    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =        IU->IVUsesByStride.find(IU->StrideOrder[i]);      assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");      ilist<IVStrideUse> &List = SI->second->Users; @@ -471,7 +469,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,        Instruction *User = UI->getUser();        // Compute the final addrec to expand into code. -      const SCEV* AR = IU->getReplacementExpr(*UI); +      const SCEV *AR = IU->getReplacementExpr(*UI);        // FIXME: It is an extremely bad idea to indvar substitute anything more        // complex than affine induction variables.  Doing so will put expensive @@ -508,8 +506,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,          NewVal->takeName(Op);        User->replaceUsesOfWith(Op, NewVal);        UI->setOperandValToReplace(NewVal); -      DOUT << "INDVARS: Rewrote IV '" << *AR << "' " << *Op -           << "   into = " << *NewVal << "\n"; +      DEBUG(errs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' +                   << "   into = " << *NewVal << "\n");        ++NumRemoved;        Changed = true; @@ -546,8 +544,19 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {      // New instructions were inserted at the end of the preheader.      if (isa<PHINode>(I))        break; -    if (I->isTrapping()) +    // Don't move instructions which might have side effects, since the side +    // effects need to complete before instructions inside the loop.  Also +    // don't move instructions which might read memory, since the loop may +    // modify memory. Note that it's okay if the instruction might have +    // undefined behavior: LoopSimplify guarantees that the preheader +    // dominates the exit block. +    if (I->mayHaveSideEffects() || I->mayReadFromMemory())        continue; +    // Don't sink static AllocaInsts out of the entry block, which would +    // turn them into dynamic allocas! +    if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) +      if (AI->isStaticAlloca()) +        continue;      // Determine if there is a use in or before the loop (direct or      // otherwise).      bool UsedInLoop = false; @@ -630,7 +639,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {    // Check incoming value.    ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge));    if (!InitValue) return; -  uint64_t newInitValue = Type::Int32Ty->getPrimitiveSizeInBits(); +  uint64_t newInitValue = +              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();    if (!convertToInt(InitValue->getValueAPF(), &newInitValue))      return; @@ -646,7 +656,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {      IncrVIndex = 0;    IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex));    if (!IncrValue) return; -  uint64_t newIncrValue = Type::Int32Ty->getPrimitiveSizeInBits(); +  uint64_t newIncrValue = +              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();    if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue))      return; @@ -677,7 +688,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {      EVIndex = 0;    EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex));    if (!EV) return; -  uint64_t intEV = Type::Int32Ty->getPrimitiveSizeInBits(); +  uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();    if (!convertToInt(EV->getValueAPF(), &intEV))      return; @@ -710,24 +721,26 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {    if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return;    // Insert new integer induction variable. -  PHINode *NewPHI = PHINode::Create(Type::Int32Ty, +  PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()),                                      PH->getName()+".int", PH); -  NewPHI->addIncoming(Context->getConstantInt(Type::Int32Ty, newInitValue), +  NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()), +                                       newInitValue),                        PH->getIncomingBlock(IncomingEdge));    Value *NewAdd = BinaryOperator::CreateAdd(NewPHI, -                                          Context->getConstantInt(Type::Int32Ty, +                           ConstantInt::get(Type::getInt32Ty(PH->getContext()),                                                               newIncrValue),                                              Incr->getName()+".int", Incr);    NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));    // The back edge is edge 1 of newPHI, whatever it may have been in the    // original PHI. -  ConstantInt *NewEV = Context->getConstantInt(Type::Int32Ty, intEV); +  ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()), +                                        intEV);    Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);    Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1)); -  ICmpInst *NewEC = new ICmpInst(NewPred, LHS, RHS, EC->getNameStart(), -                                 EC->getParent()->getTerminator()); +  ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(), +                                 NewPred, LHS, RHS, EC->getName());    // In the following deltions, PH may become dead and may be deleted.    // Use a WeakVH to observe whether this happens. @@ -739,7 +752,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {    RecursivelyDeleteTriviallyDeadInstructions(EC);    // Delete old, floating point, increment instruction. -  Incr->replaceAllUsesWith(Context->getUndef(Incr->getType())); +  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));    RecursivelyDeleteTriviallyDeadInstructions(Incr);    // Replace floating induction variable, if it isn't already deleted. diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 59fbd396a3a1..7c96c49a34b9 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -40,7 +40,9 @@  #include "llvm/Pass.h"  #include "llvm/DerivedTypes.h"  #include "llvm/GlobalVariable.h" +#include "llvm/Operator.h"  #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/MallocHelper.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -48,11 +50,13 @@  #include "llvm/Support/CallSite.h"  #include "llvm/Support/ConstantRange.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Support/InstVisitor.h" +#include "llvm/Support/IRBuilder.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/SmallPtrSet.h" @@ -60,7 +64,6 @@  #include "llvm/ADT/STLExtras.h"  #include <algorithm>  #include <climits> -#include <sstream>  using namespace llvm;  using namespace llvm::PatternMatch; @@ -71,29 +74,49 @@ STATISTIC(NumDeadStore, "Number of dead stores eliminated");  STATISTIC(NumSunkInst , "Number of instructions sunk");  namespace { -  class VISIBILITY_HIDDEN InstCombiner -    : public FunctionPass, -      public InstVisitor<InstCombiner, Instruction*> { -    // Worklist of all of the instructions that need to be simplified. +  /// InstCombineWorklist - This is the worklist management logic for +  /// InstCombine. +  class InstCombineWorklist {      SmallVector<Instruction*, 256> Worklist;      DenseMap<Instruction*, unsigned> WorklistMap; -    TargetData *TD; -    bool MustPreserveLCSSA; +     +    void operator=(const InstCombineWorklist&RHS);   // DO NOT IMPLEMENT +    InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT    public: -    static char ID; // Pass identification, replacement for typeid -    InstCombiner() : FunctionPass(&ID) {} - -    LLVMContext* getContext() { return Context; } - -    /// AddToWorkList - Add the specified instruction to the worklist if it -    /// isn't already in it. -    void AddToWorkList(Instruction *I) { -      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) +    InstCombineWorklist() {} +     +    bool isEmpty() const { return Worklist.empty(); } +     +    /// Add - Add the specified instruction to the worklist if it isn't already +    /// in it. +    void Add(Instruction *I) { +      if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) { +        DEBUG(errs() << "IC: ADD: " << *I << '\n');          Worklist.push_back(I); +      } +    } +     +    void AddValue(Value *V) { +      if (Instruction *I = dyn_cast<Instruction>(V)) +        Add(I);      } -    // RemoveFromWorkList - remove I from the worklist if it exists. -    void RemoveFromWorkList(Instruction *I) { +    /// AddInitialGroup - Add the specified batch of stuff in reverse order. +    /// which should only be done when the worklist is empty and when the group +    /// has no duplicates. +    void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { +      assert(Worklist.empty() && "Worklist must be empty to add initial group"); +      Worklist.reserve(NumEntries+16); +      DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); +      for (; NumEntries; --NumEntries) { +        Instruction *I = List[NumEntries-1]; +        WorklistMap.insert(std::make_pair(I, Worklist.size())); +        Worklist.push_back(I); +      } +    } +     +    // Remove - remove I from the worklist if it exists. +    void Remove(Instruction *I) {        DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);        if (It == WorklistMap.end()) return; // Not in worklist. @@ -103,51 +126,74 @@ namespace {        WorklistMap.erase(It);      } -    Instruction *RemoveOneFromWorkList() { +    Instruction *RemoveOne() {        Instruction *I = Worklist.back();        Worklist.pop_back();        WorklistMap.erase(I);        return I;      } -          /// AddUsersToWorkList - When an instruction is simplified, add all users of      /// the instruction to the work lists because they might get more simplified      /// now.      /// -    void AddUsersToWorkList(Value &I) { +    void AddUsersToWorkList(Instruction &I) {        for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();             UI != UE; ++UI) -        AddToWorkList(cast<Instruction>(*UI)); -    } - -    /// AddUsesToWorkList - When an instruction is simplified, add operands to -    /// the work lists because they might get more simplified now. -    /// -    void AddUsesToWorkList(Instruction &I) { -      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) -        if (Instruction *Op = dyn_cast<Instruction>(*i)) -          AddToWorkList(Op); +        Add(cast<Instruction>(*UI));      } -    /// AddSoonDeadInstToWorklist - The specified instruction is about to become -    /// dead.  Add all of its operands to the worklist, turning them into -    /// undef's to reduce the number of uses of those instructions. -    /// -    /// Return the specified operand before it is turned into an undef. -    /// -    Value *AddSoonDeadInstToWorklist(Instruction &I, unsigned op) { -      Value *R = I.getOperand(op); -       -      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) -        if (Instruction *Op = dyn_cast<Instruction>(*i)) { -          AddToWorkList(Op); -          // Set the operand to undef to drop the use. -          *i = Context->getUndef(Op->getType()); -        } +     +    /// Zap - check that the worklist is empty and nuke the backing store for +    /// the map if it is large. +    void Zap() { +      assert(WorklistMap.empty() && "Worklist empty, but map not?"); -      return R; +      // Do an explicit clear, this shrinks the map if needed. +      WorklistMap.clear();      } +  }; +} // end anonymous namespace. + + +namespace { +  /// InstCombineIRInserter - This is an IRBuilder insertion helper that works +  /// just like the normal insertion helper, but also adds any new instructions +  /// to the instcombine worklist. +  class InstCombineIRInserter : public IRBuilderDefaultInserter<true> { +    InstCombineWorklist &Worklist; +  public: +    InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} +     +    void InsertHelper(Instruction *I, const Twine &Name, +                      BasicBlock *BB, BasicBlock::iterator InsertPt) const { +      IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt); +      Worklist.Add(I); +    } +  }; +} // end anonymous namespace + + +namespace { +  class InstCombiner : public FunctionPass, +                       public InstVisitor<InstCombiner, Instruction*> { +    TargetData *TD; +    bool MustPreserveLCSSA; +    bool MadeIRChange; +  public: +    /// Worklist - All of the instructions that need to be simplified. +    InstCombineWorklist Worklist; + +    /// Builder - This is an IRBuilder that automatically inserts new +    /// instructions into the worklist when they are created. +    typedef IRBuilder<true, ConstantFolder, InstCombineIRInserter> BuilderTy; +    BuilderTy *Builder; +         +    static char ID; // Pass identification, replacement for typeid +    InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {} + +    LLVMContext *Context; +    LLVMContext *getContext() const { return Context; }    public:      virtual bool runOnFunction(Function &F); @@ -155,12 +201,11 @@ namespace {      bool DoOneIteration(Function &F, unsigned ItNum);      virtual void getAnalysisUsage(AnalysisUsage &AU) const { -      AU.addRequired<TargetData>();        AU.addPreservedID(LCSSAID);        AU.setPreservesCFG();      } -    TargetData &getTargetData() const { return *TD; } +    TargetData *getTargetData() const { return TD; }      // Visitation implementation - Implement instruction combining for different      // instruction types.  The semantics are as follows: @@ -187,8 +232,10 @@ namespace {      Instruction *visitSDiv(BinaryOperator &I);      Instruction *visitFDiv(BinaryOperator &I);      Instruction *FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); +    Instruction *FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);      Instruction *visitAnd(BinaryOperator &I);      Instruction *FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS); +    Instruction *FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, FCmpInst *RHS);      Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,                                       Value *A, Value *B, Value *C);      Instruction *visitOr (BinaryOperator &I); @@ -208,7 +255,7 @@ namespace {      Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,                                  ConstantInt *DivRHS); -    Instruction *FoldGEPICmp(User *GEPLHS, Value *RHS, +    Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,                               ICmpInst::Predicate Cond, Instruction &I);      Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,                                       BinaryOperator &I); @@ -269,30 +316,10 @@ namespace {               "New instruction already inserted into a basic block!");        BasicBlock *BB = Old.getParent();        BB->getInstList().insert(&Old, New);  // Insert inst -      AddToWorkList(New); +      Worklist.Add(New);        return New;      } - -    /// InsertCastBefore - Insert a cast of V to TY before the instruction POS. -    /// This also adds the cast to the worklist.  Finally, this returns the -    /// cast. -    Value *InsertCastBefore(Instruction::CastOps opc, Value *V, const Type *Ty, -                            Instruction &Pos) { -      if (V->getType() == Ty) return V; - -      if (Constant *CV = dyn_cast<Constant>(V)) -        return Context->getConstantExprCast(opc, CV, Ty); -       -      Instruction *C = CastInst::Create(opc, V, Ty, V->getName(), &Pos); -      AddToWorkList(C); -      return C; -    } -    Value *InsertBitCastBefore(Value *V, const Type *Ty, Instruction &Pos) { -      return InsertCastBefore(Instruction::BitCast, V, Ty, Pos); -    } - -      // ReplaceInstUsesWith - This method is to be used when an instruction is      // found to be dead, replacable with another preexisting expression.  Here      // we add all uses of I to the worklist, replace all uses of I with the new @@ -300,16 +327,15 @@ namespace {      // modified.      //      Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { -      AddUsersToWorkList(I);         // Add all modified instrs to worklist -      if (&I != V) { -        I.replaceAllUsesWith(V); -        return &I; -      } else { -        // If we are replacing the instruction with itself, this must be in a -        // segment of unreachable code, so just clobber the instruction. -        I.replaceAllUsesWith(Context->getUndef(I.getType())); -        return &I; -      } +      Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist. +       +      // If we are replacing the instruction with itself, this must be in a +      // segment of unreachable code, so just clobber the instruction. +      if (&I == V)  +        V = UndefValue::get(I.getType()); +         +      I.replaceAllUsesWith(V); +      return &I;      }      // EraseInstFromFunction - When dealing with an instruction that has side @@ -317,10 +343,19 @@ namespace {      // instruction.  Instead, visit methods should return the value returned by      // this function.      Instruction *EraseInstFromFunction(Instruction &I) { +      DEBUG(errs() << "IC: ERASE " << I << '\n'); +        assert(I.use_empty() && "Cannot erase instruction that is used!"); -      AddUsesToWorkList(I); -      RemoveFromWorkList(&I); +      // Make sure that we reprocess all operands now that we reduced their +      // use counts. +      if (I.getNumOperands() < 8) { +        for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i) +          if (Instruction *Op = dyn_cast<Instruction>(*i)) +            Worklist.Add(Op); +      } +      Worklist.Remove(&I);        I.eraseFromParent(); +      MadeIRChange = true;        return 0;  // Don't do anything with FI      } @@ -364,10 +399,15 @@ namespace {      Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,                                        APInt& UndefElts, unsigned Depth = 0); -    // FoldOpIntoPhi - Given a binary operator or cast instruction which has a -    // PHI node as operand #0, see if we can fold the instruction into the PHI -    // (which is only possible if all operands to the PHI are constants). -    Instruction *FoldOpIntoPhi(Instruction &I); +    // FoldOpIntoPhi - Given a binary operator, cast instruction, or select +    // which has a PHI node as operand #0, see if we can fold the instruction +    // into the PHI (which is only possible if all operands to the PHI are +    // constants). +    // +    // If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms +    // that would normally be unprofitable because they strongly encourage jump +    // threading. +    Instruction *FoldOpIntoPhi(Instruction &I, bool AllowAggressive = false);      // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"      // operator and they all are only used by the PHI, PHI together their @@ -399,7 +439,7 @@ namespace {                                          unsigned PrefAlign = 0);    }; -} +} // end anonymous namespace  char InstCombiner::ID = 0;  static RegisterPass<InstCombiner> @@ -409,7 +449,8 @@ X("instcombine", "Combine redundant instructions");  //   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst  static unsigned getComplexity(Value *V) {    if (isa<Instruction>(V)) { -    if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || +    if (BinaryOperator::isNeg(V) || +        BinaryOperator::isFNeg(V) ||          BinaryOperator::isNot(V))        return 3;      return 4; @@ -429,7 +470,7 @@ static bool isOnlyUse(Value *V) {  static const Type *getPromotedType(const Type *Ty) {    if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {      if (ITy->getBitWidth() < 32) -      return Type::Int32Ty; +      return Type::getInt32Ty(Ty->getContext());    }    return Ty;  } @@ -438,29 +479,12 @@ static const Type *getPromotedType(const Type *Ty) {  /// expression bitcast, or a GetElementPtrInst with all zero indices, return the  /// operand value, otherwise return null.  static Value *getBitCastOperand(Value *V) { -  if (BitCastInst *I = dyn_cast<BitCastInst>(V)) -    // BitCastInst? -    return I->getOperand(0); -  else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { -    // GetElementPtrInst? -    if (GEP->hasAllZeroIndices()) -      return GEP->getOperand(0); -  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { -    if (CE->getOpcode() == Instruction::BitCast) -      // BitCast ConstantExp? -      return CE->getOperand(0); -    else if (CE->getOpcode() == Instruction::GetElementPtr) { -      // GetElementPtr ConstantExp? -      for (User::op_iterator I = CE->op_begin() + 1, E = CE->op_end(); -           I != E; ++I) { -        ConstantInt *CI = dyn_cast<ConstantInt>(I); -        if (!CI || !CI->isZero()) -          // Any non-zero indices? Not cast-like. -          return 0; -      } -      // All-zero indices? This is just like casting. -      return CE->getOperand(0); -    } +  if (Operator *O = dyn_cast<Operator>(V)) { +    if (O->getOpcode() == Instruction::BitCast) +      return O->getOperand(0); +    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) +      if (GEP->hasAllZeroIndices()) +        return GEP->getPointerOperand();    }    return 0;  } @@ -474,7 +498,7 @@ isEliminableCastPair(    const Type *DstTy,     ///< The target type for the second cast instruction    TargetData *TD         ///< The target data for pointer size  ) { -   +    const Type *SrcTy = CI->getOperand(0)->getType();   // A from above    const Type *MidTy = CI->getType();                  // B from above @@ -483,12 +507,15 @@ isEliminableCastPair(    Instruction::CastOps secondOp = Instruction::CastOps(opcode);    unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, -                                                DstTy, TD->getIntPtrType()); +                                                DstTy, +                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);    // We don't want to form an inttoptr or ptrtoint that converts to an integer    // type that differs from the pointer size. -  if ((Res == Instruction::IntToPtr && SrcTy != TD->getIntPtrType()) || -      (Res == Instruction::PtrToInt && DstTy != TD->getIntPtrType())) +  if ((Res == Instruction::IntToPtr && +          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) || +      (Res == Instruction::PtrToInt && +          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))      Res = 0;    return Instruction::CastOps(Res); @@ -503,7 +530,7 @@ static bool ValueRequiresCast(Instruction::CastOps opcode, const Value *V,    // If this is another cast that can be eliminated, it isn't codegen either.    if (const CastInst *CI = dyn_cast<CastInst>(V)) -    if (isEliminableCastPair(CI, opcode, Ty, TD))  +    if (isEliminableCastPair(CI, opcode, Ty, TD))        return false;    return true;  } @@ -528,7 +555,7 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {    if (BinaryOperator *Op = dyn_cast<BinaryOperator>(I.getOperand(0)))      if (Op->getOpcode() == Opcode && isa<Constant>(Op->getOperand(1))) {        if (isa<Constant>(I.getOperand(1))) { -        Constant *Folded = Context->getConstantExpr(I.getOpcode(), +        Constant *Folded = ConstantExpr::get(I.getOpcode(),                                               cast<Constant>(I.getOperand(1)),                                               cast<Constant>(Op->getOperand(1)));          I.setOperand(0, Op->getOperand(0)); @@ -541,11 +568,11 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {            Constant *C2 = cast<Constant>(Op1->getOperand(1));            // Fold (op (op V1, C1), (op V2, C2)) ==> (op (op V1, V2), (op C1,C2)) -          Constant *Folded = Context->getConstantExpr(I.getOpcode(), C1, C2); +          Constant *Folded = ConstantExpr::get(I.getOpcode(), C1, C2);            Instruction *New = BinaryOperator::Create(Opcode, Op->getOperand(0),                                                      Op1->getOperand(0),                                                      Op1->getName(), &I); -          AddToWorkList(New); +          Worklist.Add(New);            I.setOperand(0, New);            I.setOperand(1, Folded);            return true; @@ -568,17 +595,17 @@ bool InstCombiner::SimplifyCompare(CmpInst &I) {  // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction  // if the LHS is a constant zero (which is the 'negate' form).  // -static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castNegVal(Value *V) {    if (BinaryOperator::isNeg(V))      return BinaryOperator::getNegArgument(V);    // Constants can be considered to be negated values if they can be folded.    if (ConstantInt *C = dyn_cast<ConstantInt>(V)) -    return Context->getConstantExprNeg(C); +    return ConstantExpr::getNeg(C);    if (ConstantVector *C = dyn_cast<ConstantVector>(V))      if (C->getType()->getElementType()->isInteger()) -      return Context->getConstantExprNeg(C); +      return ConstantExpr::getNeg(C);    return 0;  } @@ -587,28 +614,28 @@ static inline Value *dyn_castNegVal(Value *V, LLVMContext* Context) {  // instruction if the LHS is a constant negative zero (which is the 'negate'  // form).  // -static inline Value *dyn_castFNegVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castFNegVal(Value *V) {    if (BinaryOperator::isFNeg(V))      return BinaryOperator::getFNegArgument(V);    // Constants can be considered to be negated values if they can be folded.    if (ConstantFP *C = dyn_cast<ConstantFP>(V)) -    return Context->getConstantExprFNeg(C); +    return ConstantExpr::getFNeg(C);    if (ConstantVector *C = dyn_cast<ConstantVector>(V))      if (C->getType()->getElementType()->isFloatingPoint()) -      return Context->getConstantExprFNeg(C); +      return ConstantExpr::getFNeg(C);    return 0;  } -static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) { +static inline Value *dyn_castNotVal(Value *V) {    if (BinaryOperator::isNot(V))      return BinaryOperator::getNotArgument(V);    // Constants can be considered to be not'ed values...    if (ConstantInt *C = dyn_cast<ConstantInt>(V)) -    return Context->getConstantInt(~C->getValue()); +    return ConstantInt::get(C->getType(), ~C->getValue());    return 0;  } @@ -617,8 +644,7 @@ static inline Value *dyn_castNotVal(Value *V, LLVMContext* Context) {  // non-constant operand of the multiply, and set CST to point to the multiplier.  // Otherwise, return null.  // -static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST, -                                         LLVMContext* Context) { +static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {    if (V->hasOneUse() && V->getType()->isInteger())      if (Instruction *I = dyn_cast<Instruction>(V)) {        if (I->getOpcode() == Instruction::Mul) @@ -629,48 +655,27 @@ static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST,            // The multiplier is really 1 << CST.            uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();            uint32_t CSTVal = CST->getLimitedValue(BitWidth); -          CST = Context->getConstantInt(APInt(BitWidth, 1).shl(CSTVal)); +          CST = ConstantInt::get(V->getType()->getContext(), +                                 APInt(BitWidth, 1).shl(CSTVal));            return I->getOperand(0);          }      }    return 0;  } -/// dyn_castGetElementPtr - If this is a getelementptr instruction or constant -/// expression, return it. -static User *dyn_castGetElementPtr(Value *V) { -  if (isa<GetElementPtrInst>(V)) return cast<User>(V); -  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) -    if (CE->getOpcode() == Instruction::GetElementPtr) -      return cast<User>(V); -  return false; -} - -/// getOpcode - If this is an Instruction or a ConstantExpr, return the -/// opcode value. Otherwise return UserOp1. -static unsigned getOpcode(const Value *V) { -  if (const Instruction *I = dyn_cast<Instruction>(V)) -    return I->getOpcode(); -  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) -    return CE->getOpcode(); -  // Use UserOp1 to mean there's no opcode. -  return Instruction::UserOp1; -} -  /// AddOne - Add one to a ConstantInt -static Constant *AddOne(Constant *C, LLVMContext* Context) { -  return Context->getConstantExprAdd(C,  -    Context->getConstantInt(C->getType(), 1)); +static Constant *AddOne(Constant *C) { +  return ConstantExpr::getAdd(C,  +    ConstantInt::get(C->getType(), 1));  }  /// SubOne - Subtract one from a ConstantInt -static Constant *SubOne(ConstantInt *C, LLVMContext* Context) { -  return Context->getConstantExprSub(C,  -    Context->getConstantInt(C->getType(), 1)); +static Constant *SubOne(ConstantInt *C) { +  return ConstantExpr::getSub(C,  +    ConstantInt::get(C->getType(), 1));  }  /// MultiplyOverflows - True if the multiply can not be expressed in an int  /// this size. -static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign, -                              LLVMContext* Context) { +static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {    uint32_t W = C1->getBitWidth();    APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();    if (sign) { @@ -697,7 +702,7 @@ static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign,  /// are any bits set in the constant that are not demanded.  If so, shrink the  /// constant and return true.  static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,  -                                   APInt Demanded, LLVMContext* Context) { +                                   APInt Demanded) {    assert(I && "No instruction?");    assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -712,7 +717,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,    // This instruction is producing bits that are not demanded. Shrink the RHS.    Demanded &= OpC->getValue(); -  I->setOperand(OpNo, Context->getConstantInt(Demanded)); +  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));    return true;  } @@ -784,7 +789,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,    Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,                                            KnownZero, KnownOne, Depth);    if (NewVal == 0) return false; -  U.set(NewVal); +  U = NewVal;    return true;  } @@ -844,7 +849,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,    if (DemandedMask == 0) {   // Not demanding any bits from V.      if (isa<UndefValue>(V))        return 0; -    return Context->getUndef(VTy); +    return UndefValue::get(VTy);    }    if (Depth == 6)        // Limit search depth. @@ -886,7 +891,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,        // If all of the demanded bits in the inputs are known zeros, return zero.        if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) -        return Context->getNullValue(VTy); +        return Constant::getNullValue(VTy);      } else if (I->getOpcode() == Instruction::Or) {        // We can simplify (X|Y) -> X or Y in the user's context if we know that @@ -955,10 +960,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,      // If all of the demanded bits in the inputs are known zeros, return zero.      if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask) -      return Context->getNullValue(VTy); +      return Constant::getNullValue(VTy);      // If the RHS is a constant, see if we can simplify it. -    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero, Context)) +    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))        return I;      // Output known-1 bits are only known if set in both the LHS & RHS. @@ -995,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,        return I->getOperand(1);      // If the RHS is a constant, see if we can simplify it. -    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context)) +    if (ShrinkDemandedConstant(I, 1, DemandedMask))        return I;      // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1030,7 +1035,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,      // other, turn this into an *inclusive* or.      //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0      if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { -      Instruction *Or = +      Instruction *Or =           BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),                                   I->getName());        return InsertNewInstBefore(Or, *I); @@ -1043,7 +1048,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,      if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) {         // all known        if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { -        Constant *AndC = Context->getConstantInt(~RHSKnownOne & DemandedMask); +        Constant *AndC = Constant::getIntegerValue(VTy, +                                                   ~RHSKnownOne & DemandedMask);          Instruction *And =             BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");          return InsertNewInstBefore(And, *I); @@ -1052,9 +1058,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,      // If the RHS is a constant, see if we can simplify it.      // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. -    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context)) +    if (ShrinkDemandedConstant(I, 1, DemandedMask))        return I; +    // If our LHS is an 'and' and if it has one use, and if any of the bits we +    // are flipping are known to be set, then the xor is just resetting those +    // bits to zero.  We can just knock out bits from the 'and' and the 'xor', +    // simplifying both of them. +    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0))) +      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() && +          isa<ConstantInt>(I->getOperand(1)) && +          isa<ConstantInt>(LHSInst->getOperand(1)) && +          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) { +        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1)); +        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1)); +        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask); +         +        Constant *AndC = +          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue()); +        Instruction *NewAnd =  +          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp"); +        InsertNewInstBefore(NewAnd, *I); +         +        Constant *XorC = +          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue()); +        Instruction *NewXor = +          BinaryOperator::CreateXor(NewAnd, XorC, "tmp"); +        return InsertNewInstBefore(NewXor, *I); +      } +           +                RHSKnownZero = KnownZeroOut;      RHSKnownOne  = KnownOneOut;      break; @@ -1069,8 +1102,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,      assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?");       // If the operands are constants, see if we can simplify them. -    if (ShrinkDemandedConstant(I, 1, DemandedMask, Context) || -        ShrinkDemandedConstant(I, 2, DemandedMask, Context)) +    if (ShrinkDemandedConstant(I, 1, DemandedMask) || +        ShrinkDemandedConstant(I, 2, DemandedMask))        return I;      // Only known if known in both the LHS and RHS. @@ -1194,7 +1227,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,        // If the RHS of the add has bits set that can't affect the input, reduce        // the constant. -      if (ShrinkDemandedConstant(I, 1, InDemandedBits, Context)) +      if (ShrinkDemandedConstant(I, 1, InDemandedBits))          return I;        // Avoid excess work. @@ -1415,10 +1448,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,            Instruction *NewVal;            if (InputBit > ResultBit)              NewVal = BinaryOperator::CreateLShr(I->getOperand(1), -                    Context->getConstantInt(I->getType(), InputBit-ResultBit)); +                    ConstantInt::get(I->getType(), InputBit-ResultBit));            else              NewVal = BinaryOperator::CreateShl(I->getOperand(1), -                    Context->getConstantInt(I->getType(), ResultBit-InputBit)); +                    ConstantInt::get(I->getType(), ResultBit-InputBit));            NewVal->takeName(I);            return InsertNewInstBefore(NewVal, *I);          } @@ -1434,12 +1467,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,    // If the client is only demanding bits that we know, return the known    // constant. -  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { -    Constant *C = Context->getConstantInt(RHSKnownOne); -    if (isa<PointerType>(V->getType())) -      C = Context->getConstantExprIntToPtr(C, V->getType()); -    return C; -  } +  if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) +    return Constant::getIntegerValue(VTy, RHSKnownOne);    return false;  } @@ -1465,13 +1494,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,      return 0;    } else if (DemandedElts == 0) { // If nothing is demanded, provide undef.      UndefElts = EltMask; -    return Context->getUndef(V->getType()); +    return UndefValue::get(V->getType());    }    UndefElts = 0;    if (ConstantVector *CP = dyn_cast<ConstantVector>(V)) {      const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); -    Constant *Undef = Context->getUndef(EltTy); +    Constant *Undef = UndefValue::get(EltTy);      std::vector<Constant*> Elts;      for (unsigned i = 0; i != VWidth; ++i) @@ -1486,7 +1515,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,        }      // If we changed the constant, return it. -    Constant *NewCP = Context->getConstantVector(Elts); +    Constant *NewCP = ConstantVector::get(Elts);      return NewCP != CP ? NewCP : 0;    } else if (isa<ConstantAggregateZero>(V)) {      // Simplify the CAZ to a ConstantVector where the non-demanded elements are @@ -1498,15 +1527,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,        return 0;      const Type *EltTy = cast<VectorType>(V->getType())->getElementType(); -    Constant *Zero = Context->getNullValue(EltTy); -    Constant *Undef = Context->getUndef(EltTy); +    Constant *Zero = Constant::getNullValue(EltTy); +    Constant *Undef = UndefValue::get(EltTy);      std::vector<Constant*> Elts;      for (unsigned i = 0; i != VWidth; ++i) {        Constant *Elt = DemandedElts[i] ? Zero : Undef;        Elts.push_back(Elt);      }      UndefElts = DemandedElts ^ EltMask; -    return Context->getConstantVector(Elts); +    return ConstantVector::get(Elts);    }    // Limit search depth. @@ -1553,8 +1582,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,      // If this is inserting an element that isn't demanded, remove this      // insertelement.      unsigned IdxNo = Idx->getZExtValue(); -    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) -      return AddSoonDeadInstToWorklist(*I, 0); +    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) { +      Worklist.Add(I); +      return I->getOperand(0); +    }      // Otherwise, the element inserted overwrites whatever was there, so the      // input demanded set is simpler than the output set. @@ -1620,12 +1651,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,        std::vector<Constant*> Elts;        for (unsigned i = 0; i < VWidth; ++i) {          if (UndefElts[i]) -          Elts.push_back(Context->getUndef(Type::Int32Ty)); +          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));          else -          Elts.push_back(Context->getConstantInt(Type::Int32Ty, +          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),                                            Shuffle->getMaskValue(i)));        } -      I->setOperand(2, Context->getConstantVector(Elts)); +      I->setOperand(2, ConstantVector::get(Elts));        MadeChange = true;      }      break; @@ -1678,7 +1709,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,      UndefElts = UndefElts2;      if (VWidth > InVWidth) { -      assert(0 && "Unimp"); +      llvm_unreachable("Unimp");        // If there are more elements in the result than there are in the source,        // then an output element is undef if the corresponding input element is        // undef. @@ -1686,7 +1717,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,          if (UndefElts2[OutIdx/Ratio])            UndefElts.set(OutIdx);      } else if (VWidth < InVWidth) { -      assert(0 && "Unimp"); +      llvm_unreachable("Unimp");        // If there are more elements in the source than there are in the result,        // then a result element is undef if all of the corresponding input        // elements are undef. @@ -1752,11 +1783,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,            Value *LHS = II->getOperand(1);            Value *RHS = II->getOperand(2);            // Extract the element as scalars. -          LHS = InsertNewInstBefore(new ExtractElementInst(LHS, 0U,"tmp"), *II); -          RHS = InsertNewInstBefore(new ExtractElementInst(RHS, 0U,"tmp"), *II); +          LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS,  +            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II); +          RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS, +            ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), "tmp"), *II);            switch (II->getIntrinsicID()) { -          default: assert(0 && "Case stmts out of sync!"); +          default: llvm_unreachable("Case stmts out of sync!");            case Intrinsic::x86_sse_sub_ss:            case Intrinsic::x86_sse2_sub_sd:              TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, @@ -1771,9 +1804,9 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,            Instruction *New =              InsertElementInst::Create( -              Context->getUndef(II->getType()), TmpV, 0U, II->getName()); +              UndefValue::get(II->getType()), TmpV, +              ConstantInt::get(Type::getInt32Ty(*Context), 0U, false), II->getName());            InsertNewInstBefore(New, *II); -          AddSoonDeadInstToWorklist(*II, 0);            return New;          }                    } @@ -1799,8 +1832,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,  /// 'shouldApply' and 'apply' methods.  ///  template<typename Functor> -static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F, -                                   LLVMContext* Context) { +static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F) {    unsigned Opcode = Root.getOpcode();    Value *LHS = Root.getOperand(0); @@ -1833,7 +1865,7 @@ static Instruction *AssociativeOpt(BinaryOperator &Root, const Functor &F,        // Make what used to be the LHS of the root be the user of the root...        Value *ExtraOperand = TmpLHSI->getOperand(1);        if (&Root == TmpLHSI) { -        Root.replaceAllUsesWith(Context->getNullValue(TmpLHSI->getType())); +        Root.replaceAllUsesWith(Constant::getNullValue(TmpLHSI->getType()));          return 0;        }        Root.replaceAllUsesWith(TmpLHSI);          // Users now use TmpLHSI @@ -1872,12 +1904,11 @@ namespace {  // AddRHS - Implements: X + X --> X << 1  struct AddRHS {    Value *RHS; -  LLVMContext* Context; -  AddRHS(Value *rhs, LLVMContext* C) : RHS(rhs), Context(C) {} +  explicit AddRHS(Value *rhs) : RHS(rhs) {}    bool shouldApply(Value *LHS) const { return LHS == RHS; }    Instruction *apply(BinaryOperator &Add) const {      return BinaryOperator::CreateShl(Add.getOperand(0), -                                     Context->getConstantInt(Add.getType(), 1)); +                                     ConstantInt::get(Add.getType(), 1));    }  }; @@ -1885,12 +1916,11 @@ struct AddRHS {  //                 iff C1&C2 == 0  struct AddMaskingAnd {    Constant *C2; -  LLVMContext* Context; -  AddMaskingAnd(Constant *c, LLVMContext* C) : C2(c), Context(C) {} +  explicit AddMaskingAnd(Constant *c) : C2(c) {}    bool shouldApply(Value *LHS) const {      ConstantInt *C1;      return match(LHS, m_And(m_Value(), m_ConstantInt(C1))) && -           Context->getConstantExprAnd(C1, C2)->isNullValue(); +           ConstantExpr::getAnd(C1, C2)->isNullValue();    }    Instruction *apply(BinaryOperator &Add) const {      return BinaryOperator::CreateOr(Add.getOperand(0), Add.getOperand(1)); @@ -1901,11 +1931,8 @@ struct AddMaskingAnd {  static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,                                               InstCombiner *IC) { -  LLVMContext* Context = IC->getContext(); -   -  if (CastInst *CI = dyn_cast<CastInst>(&I)) { -    return IC->InsertCastBefore(CI->getOpcode(), SO, I.getType(), I); -  } +  if (CastInst *CI = dyn_cast<CastInst>(&I)) +    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());    // Figure out if the constant is the left or the right argument.    bool ConstIsRHS = isa<Constant>(I.getOperand(1)); @@ -1913,24 +1940,24 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,    if (Constant *SOC = dyn_cast<Constant>(SO)) {      if (ConstIsRHS) -      return Context->getConstantExpr(I.getOpcode(), SOC, ConstOperand); -    return Context->getConstantExpr(I.getOpcode(), ConstOperand, SOC); +      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand); +    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);    }    Value *Op0 = SO, *Op1 = ConstOperand;    if (!ConstIsRHS)      std::swap(Op0, Op1); -  Instruction *New; +      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) -    New = BinaryOperator::Create(BO->getOpcode(), Op0, Op1,SO->getName()+".op"); -  else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) -    New = CmpInst::Create(CI->getOpcode(), CI->getPredicate(), Op0, Op1,  -                          SO->getName()+".cmp"); -  else { -    assert(0 && "Unknown binary instruction type!"); -    abort(); -  } -  return IC->InsertNewInstBefore(New, I); +    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, +                                    SO->getName()+".op"); +  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I)) +    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, +                                   SO->getName()+".cmp"); +  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I)) +    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1, +                                   SO->getName()+".cmp"); +  llvm_unreachable("Unknown binary instruction type!");  }  // FoldOpIntoSelect - Given an instruction with a select as one operand and a @@ -1946,7 +1973,7 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,    if (isa<Constant>(TV) || isa<Constant>(FV)) {      // Bool selects with constant operands can be folded to logical ops. -    if (SI->getType() == Type::Int1Ty) return 0; +    if (SI->getType() == Type::getInt1Ty(*IC->getContext())) return 0;      Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, IC);      Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, IC); @@ -1958,20 +1985,34 @@ static Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,  } -/// FoldOpIntoPhi - Given a binary operator or cast instruction which has a PHI -/// node as operand #0, see if we can fold the instruction into the PHI (which -/// is only possible if all operands to the PHI are constants). -Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { +/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which +/// has a PHI node as operand #0, see if we can fold the instruction into the +/// PHI (which is only possible if all operands to the PHI are constants). +/// +/// If AllowAggressive is true, FoldOpIntoPhi will allow certain transforms +/// that would normally be unprofitable because they strongly encourage jump +/// threading. +Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I, +                                         bool AllowAggressive) { +  AllowAggressive = false;    PHINode *PN = cast<PHINode>(I.getOperand(0));    unsigned NumPHIValues = PN->getNumIncomingValues(); -  if (!PN->hasOneUse() || NumPHIValues == 0) return 0; - -  // Check to see if all of the operands of the PHI are constants.  If there is -  // one non-constant value, remember the BB it is.  If there is more than one -  // or if *it* is a PHI, bail out. +  if (NumPHIValues == 0 || +      // We normally only transform phis with a single use, unless we're trying +      // hard to make jump threading happen. +      (!PN->hasOneUse() && !AllowAggressive)) +    return 0; +   +   +  // Check to see if all of the operands of the PHI are simple constants +  // (constantint/constantfp/undef).  If there is one non-constant value, +  // remember the BB it is in.  If there is more than one or if *it* is a PHI, +  // bail out.  We don't do arbitrary constant expressions here because moving +  // their computation can be expensive without a cost model.    BasicBlock *NonConstBB = 0;    for (unsigned i = 0; i != NumPHIValues; ++i) -    if (!isa<Constant>(PN->getIncomingValue(i))) { +    if (!isa<Constant>(PN->getIncomingValue(i)) || +        isa<ConstantExpr>(PN->getIncomingValue(i))) {        if (NonConstBB) return 0;  // More than one non-const value.        if (isa<PHINode>(PN->getIncomingValue(i))) return 0;  // Itself a phi.        NonConstBB = PN->getIncomingBlock(i); @@ -1986,7 +2027,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {    // operation in that block.  However, if this is a critical edge, we would be    // inserting the computation one some other paths (e.g. inside a loop).  Only    // do this if the pred block is unconditionally branching into the phi block. -  if (NonConstBB) { +  if (NonConstBB != 0 && !AllowAggressive) {      BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());      if (!BI || !BI->isUnconditional()) return 0;    } @@ -1998,15 +2039,37 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {    NewPN->takeName(PN);    // Next, add all of the operands to the PHI. -  if (I.getNumOperands() == 2) { +  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { +    // We only currently try to fold the condition of a select when it is a phi, +    // not the true/false values. +    Value *TrueV = SI->getTrueValue(); +    Value *FalseV = SI->getFalseValue(); +    BasicBlock *PhiTransBB = PN->getParent(); +    for (unsigned i = 0; i != NumPHIValues; ++i) { +      BasicBlock *ThisBB = PN->getIncomingBlock(i); +      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); +      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); +      Value *InV = 0; +      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { +        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred; +      } else { +        assert(PN->getIncomingBlock(i) == NonConstBB); +        InV = SelectInst::Create(PN->getIncomingValue(i), TrueVInPred, +                                 FalseVInPred, +                                 "phitmp", NonConstBB->getTerminator()); +        Worklist.Add(cast<Instruction>(InV)); +      } +      NewPN->addIncoming(InV, ThisBB); +    } +  } else if (I.getNumOperands() == 2) {      Constant *C = cast<Constant>(I.getOperand(1));      for (unsigned i = 0; i != NumPHIValues; ++i) {        Value *InV = 0;        if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) {          if (CmpInst *CI = dyn_cast<CmpInst>(&I)) -          InV = Context->getConstantExprCompare(CI->getPredicate(), InC, C); +          InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);          else -          InV = Context->getConstantExpr(I.getOpcode(), InC, C); +          InV = ConstantExpr::get(I.getOpcode(), InC, C);        } else {          assert(PN->getIncomingBlock(i) == NonConstBB);          if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))  @@ -2014,14 +2077,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {                                         PN->getIncomingValue(i), C, "phitmp",                                         NonConstBB->getTerminator());          else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) -          InV = CmpInst::Create(CI->getOpcode(),  +          InV = CmpInst::Create(CI->getOpcode(),                                  CI->getPredicate(),                                  PN->getIncomingValue(i), C, "phitmp",                                  NonConstBB->getTerminator());          else -          assert(0 && "Unknown binop!"); +          llvm_unreachable("Unknown binop!"); -        AddToWorkList(cast<Instruction>(InV)); +        Worklist.Add(cast<Instruction>(InV));        }        NewPN->addIncoming(InV, PN->getIncomingBlock(i));      } @@ -2031,13 +2094,13 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {      for (unsigned i = 0; i != NumPHIValues; ++i) {        Value *InV;        if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) { -        InV = Context->getConstantExprCast(CI->getOpcode(), InC, RetTy); +        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);        } else {          assert(PN->getIncomingBlock(i) == NonConstBB);          InV = CastInst::Create(CI->getOpcode(), PN->getIncomingValue(i),                                  I.getType(), "phitmp",                                  NonConstBB->getTerminator()); -        AddToWorkList(cast<Instruction>(InV)); +        Worklist.Add(cast<Instruction>(InV));        }        NewPN->addIncoming(InV, PN->getIncomingBlock(i));      } @@ -2098,13 +2161,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {        if (SimplifyDemandedInstructionBits(I))          return &I; -      // zext(i1) - 1  ->  select i1, 0, -1 +      // zext(bool) + C -> bool ? C + 1 : C        if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) -        if (CI->isAllOnesValue() && -            ZI->getOperand(0)->getType() == Type::Int1Ty) -          return SelectInst::Create(ZI->getOperand(0), -                                    Context->getNullValue(I.getType()), -                              Context->getConstantIntAllOnesValue(I.getType())); +        if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) +          return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);      }      if (isa<PHINode>(LHS)) @@ -2146,24 +2206,23 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {        const Type *MiddleType = 0;        switch (Size) {          default: break; -        case 32: MiddleType = Type::Int32Ty; break; -        case 16: MiddleType = Type::Int16Ty; break; -        case  8: MiddleType = Type::Int8Ty; break; +        case 32: MiddleType = Type::getInt32Ty(*Context); break; +        case 16: MiddleType = Type::getInt16Ty(*Context); break; +        case  8: MiddleType = Type::getInt8Ty(*Context); break;        }        if (MiddleType) { -        Instruction *NewTrunc = new TruncInst(XorLHS, MiddleType, "sext"); -        InsertNewInstBefore(NewTrunc, I); +        Value *NewTrunc = Builder->CreateTrunc(XorLHS, MiddleType, "sext");          return new SExtInst(NewTrunc, I.getType(), I.getName());        }      }    } -  if (I.getType() == Type::Int1Ty) +  if (I.getType() == Type::getInt1Ty(*Context))      return BinaryOperator::CreateXor(LHS, RHS);    // X + X --> X << 1    if (I.getType()->isInteger()) { -    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS, Context), Context)) +    if (Instruction *Result = AssociativeOpt(I, AddRHS(RHS)))        return Result;      if (Instruction *RHSI = dyn_cast<Instruction>(RHS)) { @@ -2180,11 +2239,10 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {    // -A + B  -->  B - A    // -A + -B  -->  -(A + B) -  if (Value *LHSV = dyn_castNegVal(LHS, Context)) { +  if (Value *LHSV = dyn_castNegVal(LHS)) {      if (LHS->getType()->isIntOrIntVector()) { -      if (Value *RHSV = dyn_castNegVal(RHS, Context)) { -        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSV, RHSV, "sum"); -        InsertNewInstBefore(NewAdd, I); +      if (Value *RHSV = dyn_castNegVal(RHS)) { +        Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");          return BinaryOperator::CreateNeg(NewAdd);        }      } @@ -2194,34 +2252,34 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {    // A + -B  -->  A - B    if (!isa<Constant>(RHS)) -    if (Value *V = dyn_castNegVal(RHS, Context)) +    if (Value *V = dyn_castNegVal(RHS))        return BinaryOperator::CreateSub(LHS, V);    ConstantInt *C2; -  if (Value *X = dyn_castFoldableMul(LHS, C2, Context)) { +  if (Value *X = dyn_castFoldableMul(LHS, C2)) {      if (X == RHS)   // X*C + X --> X * (C+1) -      return BinaryOperator::CreateMul(RHS, AddOne(C2, Context)); +      return BinaryOperator::CreateMul(RHS, AddOne(C2));      // X*C1 + X*C2 --> X * (C1+C2)      ConstantInt *C1; -    if (X == dyn_castFoldableMul(RHS, C1, Context)) -      return BinaryOperator::CreateMul(X, Context->getConstantExprAdd(C1, C2)); +    if (X == dyn_castFoldableMul(RHS, C1)) +      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));    }    // X + X*C --> X * (C+1) -  if (dyn_castFoldableMul(RHS, C2, Context) == LHS) -    return BinaryOperator::CreateMul(LHS, AddOne(C2, Context)); +  if (dyn_castFoldableMul(RHS, C2) == LHS) +    return BinaryOperator::CreateMul(LHS, AddOne(C2));    // X + ~X --> -1   since   ~X = -X-1 -  if (dyn_castNotVal(LHS, Context) == RHS || -      dyn_castNotVal(RHS, Context) == LHS) -    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); +  if (dyn_castNotVal(LHS) == RHS || +      dyn_castNotVal(RHS) == LHS) +    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));    // (A & C1)+(B & C2) --> (A & C1)|(B & C2) iff C1&C2 == 0    if (match(RHS, m_And(m_Value(), m_ConstantInt(C2)))) -    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2, Context), Context)) +    if (Instruction *R = AssociativeOpt(I, AddMaskingAnd(C2)))        return R;    // A+B --> A|B iff A and B have no bits set in common. @@ -2258,8 +2316,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {        }        if (W == Y) { -        Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, Z, -                                                            LHS->getName()), I); +        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());          return BinaryOperator::CreateMul(W, NewAdd);        }      } @@ -2268,11 +2325,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {    if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {      Value *X = 0;      if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X -      return BinaryOperator::CreateSub(SubOne(CRHS, Context), X); +      return BinaryOperator::CreateSub(SubOne(CRHS), X);      // (X & FF00) + xx00  -> (X+xx00) & FF00 -    if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { -      Constant *Anded = Context->getConstantExprAnd(CRHS, C2); +    if (LHS->hasOneUse() && +        match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) { +      Constant *Anded = ConstantExpr::getAnd(CRHS, C2);        if (Anded == CRHS) {          // See if all bits from the first bit set in the Add RHS up are included          // in the mask.  First, get the rightmost bit. @@ -2286,8 +2344,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {          if (AddRHSHighBits == AddRHSHighBitsAnd) {            // Okay, the xform is safe.  Insert the new add pronto. -          Value *NewAdd = InsertNewInstBefore(BinaryOperator::CreateAdd(X, CRHS, -                                                            LHS->getName()), I); +          Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());            return BinaryOperator::CreateAnd(NewAdd, C2);          }        } @@ -2299,28 +2356,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {          return R;    } -  // add (cast *A to intptrtype) B ->  -  //   cast (GEP (cast *A to i8*) B)  -->  intptrtype -  { -    CastInst *CI = dyn_cast<CastInst>(LHS); -    Value *Other = RHS; -    if (!CI) { -      CI = dyn_cast<CastInst>(RHS); -      Other = LHS; -    } -    if (CI && CI->getType()->isSized() &&  -        (CI->getType()->getScalarSizeInBits() == -         TD->getIntPtrType()->getPrimitiveSizeInBits())  -        && isa<PointerType>(CI->getOperand(0)->getType())) { -      unsigned AS = -        cast<PointerType>(CI->getOperand(0)->getType())->getAddressSpace(); -      Value *I2 = InsertBitCastBefore(CI->getOperand(0), -                                  Context->getPointerType(Type::Int8Ty, AS), I); -      I2 = InsertNewInstBefore(GetElementPtrInst::Create(I2, Other, "ctg2"), I); -      return new PtrToIntInst(I2, CI->getType()); -    } -  } -      // add (select X 0 (sub n A)) A  -->  select X A n    {      SelectInst *SI = dyn_cast<SelectInst>(LHS); @@ -2336,10 +2371,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {        // Can we fold the add into the argument of the select?        // We check both true and false select arguments for a matching subtract. -      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) +      if (match(FV, m_Zero()) && +          match(TV, m_Sub(m_Value(N), m_Specific(A))))          // Fold the add into the true select value.          return SelectInst::Create(SI->getCondition(), N, A); -      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) +      if (match(TV, m_Zero()) && +          match(FV, m_Sub(m_Value(N), m_Specific(A))))          // Fold the add into the false select value.          return SelectInst::Create(SI->getCondition(), A, N);      } @@ -2351,14 +2388,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {      // (add (sext x), cst) --> (sext (add x, cst'))      if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {        Constant *CI =  -        Context->getConstantExprTrunc(RHSC, LHSConv->getOperand(0)->getType()); +        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());        if (LHSConv->hasOneUse() && -          Context->getConstantExprSExt(CI, I.getType()) == RHSC && +          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&            WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {          // Insert the new, smaller add. -        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),  -                                                        CI, "addconv"); -        InsertNewInstBefore(NewAdd, I); +        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0),  +                                           CI, "addconv");          return new SExtInst(NewAdd, I.getType());        }      } @@ -2373,10 +2409,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {            WillNotOverflowSignedAdd(LHSConv->getOperand(0),                                     RHSConv->getOperand(0))) {          // Insert the new integer add. -        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),  -                                                        RHSConv->getOperand(0), -                                                        "addconv"); -        InsertNewInstBefore(NewAdd, I); +        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0),  +                                           RHSConv->getOperand(0), "addconv");          return new SExtInst(NewAdd, I.getType());        }      } @@ -2392,7 +2426,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {    if (Constant *RHSC = dyn_cast<Constant>(RHS)) {      // X + 0 --> X      if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { -      if (CFP->isExactlyValue(Context->getConstantFPNegativeZero +      if (CFP->isExactlyValue(ConstantFP::getNegativeZero                                (I.getType())->getValueAPF()))          return ReplaceInstUsesWith(I, LHS);      } @@ -2404,12 +2438,12 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {    // -A + B  -->  B - A    // -A + -B  -->  -(A + B) -  if (Value *LHSV = dyn_castFNegVal(LHS, Context)) +  if (Value *LHSV = dyn_castFNegVal(LHS))      return BinaryOperator::CreateFSub(RHS, LHSV);    // A + -B  -->  A - B    if (!isa<Constant>(RHS)) -    if (Value *V = dyn_castFNegVal(RHS, Context)) +    if (Value *V = dyn_castFNegVal(RHS))        return BinaryOperator::CreateFSub(LHS, V);    // Check for X+0.0.  Simplify it to X if we know X is not -0.0. @@ -2427,14 +2461,13 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {      // instcombined.      if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {        Constant *CI =  -      Context->getConstantExprFPToSI(CFP, LHSConv->getOperand(0)->getType()); +      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());        if (LHSConv->hasOneUse() && -          Context->getConstantExprSIToFP(CI, I.getType()) == CFP && +          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&            WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {          // Insert the new integer add. -        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),  -                                                        CI, "addconv"); -        InsertNewInstBefore(NewAdd, I); +        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0), +                                           CI, "addconv");          return new SIToFPInst(NewAdd, I.getType());        }      } @@ -2449,10 +2482,8 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {            WillNotOverflowSignedAdd(LHSConv->getOperand(0),                                     RHSConv->getOperand(0))) {          // Insert the new integer add. -        Instruction *NewAdd = BinaryOperator::CreateAdd(LHSConv->getOperand(0),  -                                                        RHSConv->getOperand(0), -                                                        "addconv"); -        InsertNewInstBefore(NewAdd, I); +        Value *NewAdd = Builder->CreateAdd(LHSConv->getOperand(0),  +                                           RHSConv->getOperand(0), "addconv");          return new SIToFPInst(NewAdd, I.getType());        }      } @@ -2465,10 +2496,10 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);    if (Op0 == Op1)                        // sub X, X  -> 0 -    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    // If this is a 'B = x-(-A)', change to B = x+A... -  if (Value *V = dyn_castNegVal(Op1, Context)) +  if (Value *V = dyn_castNegVal(Op1))      return BinaryOperator::CreateAdd(Op0, V);    if (isa<UndefValue>(Op0)) @@ -2484,7 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {      // C - ~X == X + (1+C)      Value *X = 0;      if (match(Op1, m_Not(m_Value(X)))) -      return BinaryOperator::CreateAdd(X, AddOne(C, Context)); +      return BinaryOperator::CreateAdd(X, AddOne(C));      // -(X >>u 31) -> (X >>s 31)      // -(X >>s 31) -> (X >>u 31) @@ -2519,22 +2550,29 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {      if (SelectInst *SI = dyn_cast<SelectInst>(Op1))        if (Instruction *R = FoldOpIntoSelect(I, SI, this))          return R; + +    // C - zext(bool) -> bool ? C - 1 : C +    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1)) +      if (ZI->getSrcTy() == Type::getInt1Ty(*Context)) +        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);    } -  if (I.getType() == Type::Int1Ty) +  if (I.getType() == Type::getInt1Ty(*Context))      return BinaryOperator::CreateXor(Op0, Op1);    if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {      if (Op1I->getOpcode() == Instruction::Add) {        if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y -        return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName()); +        return BinaryOperator::CreateNeg(Op1I->getOperand(1), +                                         I.getName());        else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y -        return BinaryOperator::CreateNeg(Op1I->getOperand(0), I.getName()); +        return BinaryOperator::CreateNeg(Op1I->getOperand(0), +                                         I.getName());        else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {          if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))            // C1-(X+C2) --> (C1-C2)-X            return BinaryOperator::CreateSub( -            Context->getConstantExprSub(CI1, CI2), Op1I->getOperand(0)); +            ConstantExpr::getSub(CI1, CI2), Op1I->getOperand(0));        }      } @@ -2558,8 +2596,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {            (Op1I->getOperand(0) == Op0 || Op1I->getOperand(1) == Op0)) {          Value *OtherOp = Op1I->getOperand(Op1I->getOperand(0) == Op0); -        Value *NewNot = -          InsertNewInstBefore(BinaryOperator::CreateNot(OtherOp, "B.not"), I); +        Value *NewNot = Builder->CreateNot(OtherOp, "B.not");          return BinaryOperator::CreateAnd(Op0, NewNot);        } @@ -2569,13 +2606,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {            if (CSI->isZero())              if (Constant *DivRHS = dyn_cast<Constant>(Op1I->getOperand(1)))                return BinaryOperator::CreateSDiv(Op1I->getOperand(0), -                                          Context->getConstantExprNeg(DivRHS)); +                                          ConstantExpr::getNeg(DivRHS));        // X - X*C --> X * (1-C)        ConstantInt *C2 = 0; -      if (dyn_castFoldableMul(Op1I, C2, Context) == Op0) { +      if (dyn_castFoldableMul(Op1I, C2) == Op0) {          Constant *CP1 =  -          Context->getConstantExprSub(Context->getConstantInt(I.getType(), 1), +          ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),                                               C2);          return BinaryOperator::CreateMul(Op0, CP1);        } @@ -2590,18 +2627,19 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {          return ReplaceInstUsesWith(I, Op0I->getOperand(0));      } else if (Op0I->getOpcode() == Instruction::Sub) {        if (Op0I->getOperand(0) == Op1)             // (X-Y)-X == -Y -        return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName()); +        return BinaryOperator::CreateNeg(Op0I->getOperand(1), +                                         I.getName());      }    }    ConstantInt *C1; -  if (Value *X = dyn_castFoldableMul(Op0, C1, Context)) { +  if (Value *X = dyn_castFoldableMul(Op0, C1)) {      if (X == Op1)  // X*C - X --> X * (C-1) -      return BinaryOperator::CreateMul(Op1, SubOne(C1, Context)); +      return BinaryOperator::CreateMul(Op1, SubOne(C1));      ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2) -    if (X == dyn_castFoldableMul(Op1, C2, Context)) -      return BinaryOperator::CreateMul(X, Context->getConstantExprSub(C1, C2)); +    if (X == dyn_castFoldableMul(Op1, C2)) +      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));    }    return 0;  } @@ -2610,15 +2648,17 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);    // If this is a 'B = x-(-A)', change to B = x+A... -  if (Value *V = dyn_castFNegVal(Op1, Context)) +  if (Value *V = dyn_castFNegVal(Op1))      return BinaryOperator::CreateFAdd(Op0, V);    if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) {      if (Op1I->getOpcode() == Instruction::FAdd) {        if (Op1I->getOperand(0) == Op0)              // X-(X+Y) == -Y -        return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName()); +        return BinaryOperator::CreateFNeg(Op1I->getOperand(1), +                                          I.getName());        else if (Op1I->getOperand(1) == Op0)         // X-(Y+X) == -Y -        return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName()); +        return BinaryOperator::CreateFNeg(Op1I->getOperand(0), +                                          I.getName());      }    } @@ -2657,26 +2697,24 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,  Instruction *InstCombiner::visitMul(BinaryOperator &I) {    bool Changed = SimplifyCommutative(I); -  Value *Op0 = I.getOperand(0); +  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); -  // TODO: If Op1 is undef and Op0 is finite, return zero. -  if (!I.getType()->isFPOrFPVector() && -      isa<UndefValue>(I.getOperand(1)))              // undef * X -> 0 -    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +  if (isa<UndefValue>(Op1))              // undef * X -> 0 +    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); -  // Simplify mul instructions with a constant RHS... -  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) { -    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { +  // Simplify mul instructions with a constant RHS. +  if (Constant *Op1C = dyn_cast<Constant>(Op1)) { +    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1C)) {        // ((X << C1)*C2) == (X * (C2 << C1))        if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))          if (SI->getOpcode() == Instruction::Shl)            if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))              return BinaryOperator::CreateMul(SI->getOperand(0), -                                        Context->getConstantExprShl(CI, ShOp)); +                                        ConstantExpr::getShl(CI, ShOp));        if (CI->isZero()) -        return ReplaceInstUsesWith(I, Op1);  // X * 0  == 0 +        return ReplaceInstUsesWith(I, Op1C);  // X * 0  == 0        if (CI->equalsInt(1))                  // X * 1  == X          return ReplaceInstUsesWith(I, Op0);        if (CI->isAllOnesValue())              // X * -1 == 0 - X @@ -2685,12 +2723,13 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {        const APInt& Val = cast<ConstantInt>(CI)->getValue();        if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C          return BinaryOperator::CreateShl(Op0, -                 Context->getConstantInt(Op0->getType(), Val.logBase2())); +                 ConstantInt::get(Op0->getType(), Val.logBase2()));        } -    } else if (isa<VectorType>(Op1->getType())) { -      // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros. +    } else if (isa<VectorType>(Op1C->getType())) { +      if (Op1C->isNullValue()) +        return ReplaceInstUsesWith(I, Op1C); -      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { +      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {          if (Op1V->isAllOnesValue())              // X * -1 == 0 - X            return BinaryOperator::CreateNeg(Op0, I.getName()); @@ -2705,13 +2744,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0))        if (Op0I->getOpcode() == Instruction::Add && Op0I->hasOneUse() && -          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1)) { +          isa<ConstantInt>(Op0I->getOperand(1)) && isa<ConstantInt>(Op1C)) {          // Canonicalize (X+C1)*C2 -> X*C2+C1*C2. -        Instruction *Add = BinaryOperator::CreateMul(Op0I->getOperand(0), -                                                     Op1, "tmp"); -        InsertNewInstBefore(Add, I); -        Value *C1C2 = Context->getConstantExprMul(Op1,  -                                           cast<Constant>(Op0I->getOperand(1))); +        Value *Add = Builder->CreateMul(Op0I->getOperand(0), Op1C, "tmp"); +        Value *C1C2 = Builder->CreateMul(Op1C, Op0I->getOperand(1));          return BinaryOperator::CreateAdd(Add, C1C2);        } @@ -2726,93 +2762,80 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {          return NV;    } -  if (Value *Op0v = dyn_castNegVal(Op0, Context))     // -X * -Y = X*Y -    if (Value *Op1v = dyn_castNegVal(I.getOperand(1), Context)) +  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y +    if (Value *Op1v = dyn_castNegVal(Op1))        return BinaryOperator::CreateMul(Op0v, Op1v);    // (X / Y) *  Y = X - (X % Y)    // (X / Y) * -Y = (X % Y) - X    { -    Value *Op1 = I.getOperand(1); +    Value *Op1C = Op1;      BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);      if (!BO ||          (BO->getOpcode() != Instruction::UDiv &&            BO->getOpcode() != Instruction::SDiv)) { -      Op1 = Op0; -      BO = dyn_cast<BinaryOperator>(I.getOperand(1)); +      Op1C = Op0; +      BO = dyn_cast<BinaryOperator>(Op1);      } -    Value *Neg = dyn_castNegVal(Op1, Context); +    Value *Neg = dyn_castNegVal(Op1C);      if (BO && BO->hasOneUse() && -        (BO->getOperand(1) == Op1 || BO->getOperand(1) == Neg) && +        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&          (BO->getOpcode() == Instruction::UDiv ||           BO->getOpcode() == Instruction::SDiv)) {        Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1); -      Instruction *Rem; +      // If the division is exact, X % Y is zero. +      if (SDivOperator *SDiv = dyn_cast<SDivOperator>(BO)) +        if (SDiv->isExact()) { +          if (Op1BO == Op1C) +            return ReplaceInstUsesWith(I, Op0BO); +          return BinaryOperator::CreateNeg(Op0BO); +        } + +      Value *Rem;        if (BO->getOpcode() == Instruction::UDiv) -        Rem = BinaryOperator::CreateURem(Op0BO, Op1BO); +        Rem = Builder->CreateURem(Op0BO, Op1BO);        else -        Rem = BinaryOperator::CreateSRem(Op0BO, Op1BO); - -      InsertNewInstBefore(Rem, I); +        Rem = Builder->CreateSRem(Op0BO, Op1BO);        Rem->takeName(BO); -      if (Op1BO == Op1) +      if (Op1BO == Op1C)          return BinaryOperator::CreateSub(Op0BO, Rem); -      else -        return BinaryOperator::CreateSub(Rem, Op0BO); +      return BinaryOperator::CreateSub(Rem, Op0BO);      }    } -  if (I.getType() == Type::Int1Ty) -    return BinaryOperator::CreateAnd(Op0, I.getOperand(1)); +  /// i1 mul -> i1 and. +  if (I.getType() == Type::getInt1Ty(*Context)) +    return BinaryOperator::CreateAnd(Op0, Op1); +  // X*(1 << Y) --> X << Y +  // (1 << Y)*X --> X << Y +  { +    Value *Y; +    if (match(Op0, m_Shl(m_One(), m_Value(Y)))) +      return BinaryOperator::CreateShl(Op1, Y); +    if (match(Op1, m_Shl(m_One(), m_Value(Y)))) +      return BinaryOperator::CreateShl(Op0, Y); +  } +      // If one of the operands of the multiply is a cast from a boolean value, then    // we know the bool is either zero or one, so this is a 'masking' multiply. -  // See if we can simplify things based on how the boolean was originally -  // formed. -  CastInst *BoolCast = 0; -  if (ZExtInst *CI = dyn_cast<ZExtInst>(Op0)) -    if (CI->getOperand(0)->getType() == Type::Int1Ty) -      BoolCast = CI; -  if (!BoolCast) -    if (ZExtInst *CI = dyn_cast<ZExtInst>(I.getOperand(1))) -      if (CI->getOperand(0)->getType() == Type::Int1Ty) -        BoolCast = CI; -  if (BoolCast) { -    if (ICmpInst *SCI = dyn_cast<ICmpInst>(BoolCast->getOperand(0))) { -      Value *SCIOp0 = SCI->getOperand(0), *SCIOp1 = SCI->getOperand(1); -      const Type *SCOpTy = SCIOp0->getType(); -      bool TIS = false; -       -      // If the icmp is true iff the sign bit of X is set, then convert this -      // multiply into a shift/and combination. -      if (isa<ConstantInt>(SCIOp1) && -          isSignBitCheck(SCI->getPredicate(), cast<ConstantInt>(SCIOp1), TIS) && -          TIS) { -        // Shift the X value right to turn it into "all signbits". -        Constant *Amt = Context->getConstantInt(SCIOp0->getType(), -                                          SCOpTy->getPrimitiveSizeInBits()-1); -        Value *V = -          InsertNewInstBefore( -            BinaryOperator::Create(Instruction::AShr, SCIOp0, Amt, -                                            BoolCast->getOperand(0)->getName()+ -                                            ".mask"), I); - -        // If the multiply type is not the same as the source type, sign extend -        // or truncate to the multiply type. -        if (I.getType() != V->getType()) { -          uint32_t SrcBits = V->getType()->getPrimitiveSizeInBits(); -          uint32_t DstBits = I.getType()->getPrimitiveSizeInBits(); -          Instruction::CastOps opcode =  -            (SrcBits == DstBits ? Instruction::BitCast :  -             (SrcBits < DstBits ? Instruction::SExt : Instruction::Trunc)); -          V = InsertCastBefore(opcode, V, I.getType(), I); -        } +  //   X * Y (where Y is 0 or 1) -> X & (0-Y) +  if (!isa<VectorType>(I.getType())) { +    // -2 is "-1 << 1" so it is all bits set except the low one. +    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); +     +    Value *BoolCast = 0, *OtherOp = 0; +    if (MaskedValueIsZero(Op0, Negative2)) +      BoolCast = Op0, OtherOp = Op1; +    else if (MaskedValueIsZero(Op1, Negative2)) +      BoolCast = Op1, OtherOp = Op0; -        Value *OtherOp = Op0 == BoolCast ? I.getOperand(1) : Op0; -        return BinaryOperator::CreateAnd(V, OtherOp); -      } +    if (BoolCast) { +      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), +                                    BoolCast, "tmp"); +      return BinaryOperator::CreateAnd(V, OtherOp);      }    } @@ -2821,17 +2844,17 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {  Instruction *InstCombiner::visitFMul(BinaryOperator &I) {    bool Changed = SimplifyCommutative(I); -  Value *Op0 = I.getOperand(0); +  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);    // Simplify mul instructions with a constant RHS... -  if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) { -    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) { +  if (Constant *Op1C = dyn_cast<Constant>(Op1)) { +    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {        // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,        // ANSI says we can drop signals, so we can do this anyway." (from GCC)        if (Op1F->isExactlyValue(1.0))          return ReplaceInstUsesWith(I, Op0);  // Eliminate 'mul double %X, 1.0' -    } else if (isa<VectorType>(Op1->getType())) { -      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { +    } else if (isa<VectorType>(Op1C->getType())) { +      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {          // As above, vector X*splat(1.0) -> X in all defined cases.          if (Constant *Splat = Op1V->getSplatValue()) {            if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) @@ -2851,8 +2874,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {          return NV;    } -  if (Value *Op0v = dyn_castFNegVal(Op0, Context))     // -X * -Y = X*Y -    if (Value *Op1v = dyn_castFNegVal(I.getOperand(1), Context)) +  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y +    if (Value *Op1v = dyn_castFNegVal(Op1))        return BinaryOperator::CreateFMul(Op0v, Op1v);    return Changed ? &I : 0; @@ -2907,11 +2930,11 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {           I != E; ++I) {        if (*I == SI) {          *I = SI->getOperand(NonNullOperand); -        AddToWorkList(BBI); +        Worklist.Add(BBI);        } else if (*I == SelectCond) { -        *I = NonNullOperand == 1 ? Context->getConstantIntTrue() : -                                   Context->getConstantIntFalse(); -        AddToWorkList(BBI); +        *I = NonNullOperand == 1 ? ConstantInt::getTrue(*Context) : +                                   ConstantInt::getFalse(*Context); +        Worklist.Add(BBI);        }      } @@ -2942,7 +2965,7 @@ Instruction *InstCombiner::commonDivTransforms(BinaryOperator &I) {    if (isa<UndefValue>(Op0)) {      if (Op0->getType()->isFPOrFPVector())        return ReplaceInstUsesWith(I, Op0); -    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    }    // X / undef -> undef @@ -2962,12 +2985,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {    // (sdiv X, X) --> 1     (udiv X, X) --> 1    if (Op0 == Op1) {      if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) { -      Constant *CI = Context->getConstantInt(Ty->getElementType(), 1); +      Constant *CI = ConstantInt::get(Ty->getElementType(), 1);        std::vector<Constant*> Elts(Ty->getNumElements(), CI); -      return ReplaceInstUsesWith(I, Context->getConstantVector(Elts)); +      return ReplaceInstUsesWith(I, ConstantVector::get(Elts));      } -    Constant *CI = Context->getConstantInt(I.getType(), 1); +    Constant *CI = ConstantInt::get(I.getType(), 1);      return ReplaceInstUsesWith(I, CI);    } @@ -2989,11 +3012,11 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {        if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())          if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {            if (MultiplyOverflows(RHS, LHSRHS, -                                I.getOpcode()==Instruction::SDiv, Context)) -            return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +                                I.getOpcode()==Instruction::SDiv)) +            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));            else               return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), -                                      Context->getConstantExprMul(RHS, LHSRHS)); +                                      ConstantExpr::getMul(RHS, LHSRHS));          }      if (!RHS->isZero()) { // avoid X udiv 0 @@ -3009,10 +3032,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {    // 0 / X == 0, we don't need to preserve faults!    if (ConstantInt *LHS = dyn_cast<ConstantInt>(Op0))      if (LHS->equalsInt(0)) -      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    // It can't be division by zero, hence it must be division by one. -  if (I.getType() == Type::Int1Ty) +  if (I.getType() == Type::getInt1Ty(*Context))      return ReplaceInstUsesWith(I, Op0);    if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { @@ -3038,14 +3061,13 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {      // if so, convert to a right shift.      if (C->getValue().isPowerOf2())  // 0 not included in isPowerOf2        return BinaryOperator::CreateLShr(Op0,  -            Context->getConstantInt(Op0->getType(), C->getValue().logBase2())); +            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));      // X udiv C, where C >= signbit      if (C->getValue().isNegative()) { -      Value *IC = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_ULT, Op0, C), -                                      I); -      return SelectInst::Create(IC, Context->getNullValue(I.getType()), -                                Context->getConstantInt(I.getType(), 1)); +      Value *IC = Builder->CreateICmpULT( Op0, C); +      return SelectInst::Create(IC, Constant::getNullValue(I.getType()), +                                ConstantInt::get(I.getType(), 1));      }    } @@ -3057,10 +3079,8 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {        if (C1.isPowerOf2()) {          Value *N = RHSI->getOperand(1);          const Type *NTy = N->getType(); -        if (uint32_t C2 = C1.logBase2()) { -          Constant *C2V = Context->getConstantInt(NTy, C2); -          N = InsertNewInstBefore(BinaryOperator::CreateAdd(N, C2V, "tmp"), I); -        } +        if (uint32_t C2 = C1.logBase2()) +          N = Builder->CreateAdd(N, ConstantInt::get(NTy, C2), "tmp");          return BinaryOperator::CreateLShr(Op0, N);        }      } @@ -3076,16 +3096,12 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {            // Compute the shift amounts            uint32_t TSA = TVA.logBase2(), FSA = FVA.logBase2();            // Construct the "on true" case of the select -          Constant *TC = Context->getConstantInt(Op0->getType(), TSA); -          Instruction *TSI = BinaryOperator::CreateLShr( -                                                 Op0, TC, SI->getName()+".t"); -          TSI = InsertNewInstBefore(TSI, I); +          Constant *TC = ConstantInt::get(Op0->getType(), TSA); +          Value *TSI = Builder->CreateLShr(Op0, TC, SI->getName()+".t");            // Construct the "on false" case of the select -          Constant *FC = Context->getConstantInt(Op0->getType(), FSA);  -          Instruction *FSI = BinaryOperator::CreateLShr( -                                                 Op0, FC, SI->getName()+".f"); -          FSI = InsertNewInstBefore(FSI, I); +          Constant *FC = ConstantInt::get(Op0->getType(), FSA);  +          Value *FSI = Builder->CreateLShr(Op0, FC, SI->getName()+".f");            // construct the select instruction and return it.            return SelectInst::Create(SI->getOperand(0), TSI, FSI, SI->getName()); @@ -3105,17 +3121,45 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {      // sdiv X, -1 == -X      if (RHS->isAllOnesValue())        return BinaryOperator::CreateNeg(Op0); + +    // sdiv X, C  -->  ashr X, log2(C) +    if (cast<SDivOperator>(&I)->isExact() && +        RHS->getValue().isNonNegative() && +        RHS->getValue().isPowerOf2()) { +      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(), +                                            RHS->getValue().exactLogBase2()); +      return BinaryOperator::CreateAShr(Op0, ShAmt, I.getName()); +    } + +    // -X/C  -->  X/-C  provided the negation doesn't overflow. +    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) +      if (isa<Constant>(Sub->getOperand(0)) && +          cast<Constant>(Sub->getOperand(0))->isNullValue() && +          Sub->hasNoSignedWrap()) +        return BinaryOperator::CreateSDiv(Sub->getOperand(1), +                                          ConstantExpr::getNeg(RHS));    }    // If the sign bits of both operands are zero (i.e. we can prove they are    // unsigned inputs), turn this into a udiv.    if (I.getType()->isInteger()) {      APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())); -    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) { -      // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set -      return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); +    if (MaskedValueIsZero(Op0, Mask)) { +      if (MaskedValueIsZero(Op1, Mask)) { +        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set +        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); +      } +      ConstantInt *ShiftedInt; +      if (match(Op1, m_Shl(m_ConstantInt(ShiftedInt), m_Value())) && +          ShiftedInt->getValue().isPowerOf2()) { +        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) +        // Safe because the only negative value (1 << Y) can take on is +        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have +        // the sign bit set. +        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName()); +      }      } -  }       +  }    return 0;  } @@ -3134,7 +3178,7 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {    if (isa<UndefValue>(Op0)) {             // undef % X -> 0      if (I.getType()->isFPOrFPVector())        return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN) -    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    }    if (isa<UndefValue>(Op1))      return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef @@ -3159,15 +3203,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {    // 0 % X == 0 for integer, we don't need to preserve faults!    if (Constant *LHS = dyn_cast<Constant>(Op0))      if (LHS->isNullValue()) -      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {      // X % 0 == undef, we don't need to preserve faults!      if (RHS->equalsInt(0)) -      return ReplaceInstUsesWith(I, Context->getUndef(I.getType())); +      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));      if (RHS->equalsInt(1))  // X % 1 == 0 -      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));      if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {        if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) { @@ -3199,7 +3243,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {      // if so, convert to a bitwise and.      if (ConstantInt *C = dyn_cast<ConstantInt>(RHS))        if (C->getValue().isPowerOf2()) -        return BinaryOperator::CreateAnd(Op0, SubOne(C, Context)); +        return BinaryOperator::CreateAnd(Op0, SubOne(C));    }    if (Instruction *RHSI = dyn_cast<Instruction>(I.getOperand(1))) { @@ -3207,9 +3251,8 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {      if (RHSI->getOpcode() == Instruction::Shl &&          isa<ConstantInt>(RHSI->getOperand(0))) {        if (cast<ConstantInt>(RHSI->getOperand(0))->getValue().isPowerOf2()) { -        Constant *N1 = Context->getConstantIntAllOnesValue(I.getType()); -        Value *Add = InsertNewInstBefore(BinaryOperator::CreateAdd(RHSI, N1, -                                                                   "tmp"), I); +        Constant *N1 = Constant::getAllOnesValue(I.getType()); +        Value *Add = Builder->CreateAdd(RHSI, N1, "tmp");          return BinaryOperator::CreateAnd(Op0, Add);        }      } @@ -3223,12 +3266,10 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {          // STO == 0 and SFO == 0 handled above.          if ((STO->getValue().isPowerOf2()) &&               (SFO->getValue().isPowerOf2())) { -          Value *TrueAnd = InsertNewInstBefore( -            BinaryOperator::CreateAnd(Op0, SubOne(STO, Context), -                                      SI->getName()+".t"), I); -          Value *FalseAnd = InsertNewInstBefore( -            BinaryOperator::CreateAnd(Op0, SubOne(SFO, Context), -                                      SI->getName()+".f"), I); +          Value *TrueAnd = Builder->CreateAnd(Op0, SubOne(STO), +                                              SI->getName()+".t"); +          Value *FalseAnd = Builder->CreateAnd(Op0, SubOne(SFO), +                                               SI->getName()+".f");            return SelectInst::Create(SI->getOperand(0), TrueAnd, FalseAnd);          }        } @@ -3241,15 +3282,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);    // Handle the integer rem common cases -  if (Instruction *common = commonIRemTransforms(I)) -    return common; +  if (Instruction *Common = commonIRemTransforms(I)) +    return Common; -  if (Value *RHSNeg = dyn_castNegVal(Op1, Context)) +  if (Value *RHSNeg = dyn_castNegVal(Op1))      if (!isa<Constant>(RHSNeg) ||          (isa<ConstantInt>(RHSNeg) &&           cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {        // X % -Y -> X % Y -      AddUsesToWorkList(I); +      Worklist.AddValue(I.getOperand(1));        I.setOperand(1, RHSNeg);        return &I;      } @@ -3279,15 +3320,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {        for (unsigned i = 0; i != VWidth; ++i) {          if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {            if (RHS->getValue().isNegative()) -            Elts[i] = cast<ConstantInt>(Context->getConstantExprNeg(RHS)); +            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));            else              Elts[i] = RHS;          }        } -      Constant *NewRHSV = Context->getConstantVector(Elts); +      Constant *NewRHSV = ConstantVector::get(Elts);        if (NewRHSV != RHSV) { -        AddUsesToWorkList(I); +        Worklist.AddValue(I.getOperand(1));          I.setOperand(1, NewRHSV);          return &I;        } @@ -3351,7 +3392,7 @@ static unsigned getICmpCode(const ICmpInst *ICI) {    case ICmpInst::ICMP_SLE: return 6;  // 110      // True -> 7    default: -    assert(0 && "Invalid ICmp predicate!"); +    llvm_unreachable("Invalid ICmp predicate!");      return 0;    }  } @@ -3379,7 +3420,7 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {      // True -> 7    default:      // Not expecting FCMP_FALSE and FCMP_TRUE; -    assert(0 && "Unexpected FCmp predicate!"); +    llvm_unreachable("Unexpected FCmp predicate!");      return 0;    }  } @@ -3389,10 +3430,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {  /// new ICmp instruction. The sign is passed in to determine which kind  /// of predicate to use in the new icmp instruction.  static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS, -                           LLVMContext* Context) { +                           LLVMContext *Context) {    switch (code) { -  default: assert(0 && "Illegal ICmp code!"); -  case  0: return Context->getConstantIntFalse(); +  default: llvm_unreachable("Illegal ICmp code!"); +  case  0: return ConstantInt::getFalse(*Context);    case  1:       if (sign)        return new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS); @@ -3415,7 +3456,7 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,        return new ICmpInst(ICmpInst::ICMP_SLE, LHS, RHS);      else        return new ICmpInst(ICmpInst::ICMP_ULE, LHS, RHS); -  case  7: return Context->getConstantIntTrue(); +  case  7: return ConstantInt::getTrue(*Context);    }  } @@ -3423,9 +3464,9 @@ static Value *getICmpValue(bool sign, unsigned code, Value *LHS, Value *RHS,  /// opcode and two operands into either a FCmp instruction. isordered is passed  /// in to determine which kind of predicate to use in the new fcmp instruction.  static Value *getFCmpValue(bool isordered, unsigned code, -                           Value *LHS, Value *RHS, LLVMContext* Context) { +                           Value *LHS, Value *RHS, LLVMContext *Context) {    switch (code) { -  default: assert(0 && "Illegal FCmp code!"); +  default: llvm_unreachable("Illegal FCmp code!");    case  0:      if (isordered)        return new FCmpInst(FCmpInst::FCMP_ORD, LHS, RHS); @@ -3461,7 +3502,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,        return new FCmpInst(FCmpInst::FCMP_OLE, LHS, RHS);      else        return new FCmpInst(FCmpInst::FCMP_ULE, LHS, RHS); -  case  7: return Context->getConstantIntTrue(); +  case  7: return ConstantInt::getTrue(*Context);    }  } @@ -3504,7 +3545,7 @@ struct FoldICmpLogical {      case Instruction::And: Code = LHSCode & RHSCode; break;      case Instruction::Or:  Code = LHSCode | RHSCode; break;      case Instruction::Xor: Code = LHSCode ^ RHSCode; break; -    default: assert(0 && "Illegal logical opcode!"); return 0; +    default: llvm_unreachable("Illegal logical opcode!"); return 0;      }      bool isSigned = ICmpInst::isSignedPredicate(RHSICI->getPredicate()) ||  @@ -3529,14 +3570,13 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,    Value *X = Op->getOperand(0);    Constant *Together = 0;    if (!Op->isShift()) -    Together = Context->getConstantExprAnd(AndRHS, OpRHS); +    Together = ConstantExpr::getAnd(AndRHS, OpRHS);    switch (Op->getOpcode()) {    case Instruction::Xor:      if (Op->hasOneUse()) {        // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) -      Instruction *And = BinaryOperator::CreateAnd(X, AndRHS); -      InsertNewInstBefore(And, TheAnd); +      Value *And = Builder->CreateAnd(X, AndRHS);        And->takeName(Op);        return BinaryOperator::CreateXor(And, Together);      } @@ -3547,8 +3587,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,      if (Op->hasOneUse() && Together != OpRHS) {        // (X | C1) & C2 --> (X | (C1&C2)) & C2 -      Instruction *Or = BinaryOperator::CreateOr(X, Together); -      InsertNewInstBefore(Or, TheAnd); +      Value *Or = Builder->CreateOr(X, Together);        Or->takeName(Op);        return BinaryOperator::CreateAnd(Or, AndRHS);      } @@ -3578,8 +3617,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,              return &TheAnd;            } else {              // Pull the XOR out of the AND. -            Instruction *NewAnd = BinaryOperator::CreateAnd(X, AndRHS); -            InsertNewInstBefore(NewAnd, TheAnd); +            Value *NewAnd = Builder->CreateAnd(X, AndRHS);              NewAnd->takeName(Op);              return BinaryOperator::CreateXor(NewAnd, AndRHS);            } @@ -3595,7 +3633,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,      uint32_t BitWidth = AndRHS->getType()->getBitWidth();      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);      APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); -    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShlMask); +    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShlMask);      if (CI->getValue() == ShlMask) {       // Masking out bits that the shift already masks @@ -3615,7 +3653,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,      uint32_t BitWidth = AndRHS->getType()->getBitWidth();      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); -    ConstantInt *CI = Context->getConstantInt(AndRHS->getValue() & ShrMask); +    ConstantInt *CI = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);      if (CI->getValue() == ShrMask) {         // Masking out bits that the shift already masks. @@ -3634,14 +3672,12 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,        uint32_t BitWidth = AndRHS->getType()->getBitWidth();        uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);        APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); -      Constant *C = Context->getConstantInt(AndRHS->getValue() & ShrMask); +      Constant *C = ConstantInt::get(*Context, AndRHS->getValue() & ShrMask);        if (C == AndRHS) {          // Masking out bits shifted in.          // (Val ashr C1) & C2 -> (Val lshr C1) & C2          // Make the argument unsigned.          Value *ShVal = Op->getOperand(0); -        ShVal = InsertNewInstBefore( -            BinaryOperator::CreateLShr(ShVal, OpRHS,  -                                   Op->getName()), TheAnd); +        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());          return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());        }      } @@ -3659,7 +3695,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,  Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,                                             bool isSigned, bool Inside,                                              Instruction &IB) { -  assert(cast<ConstantInt>(Context->getConstantExprICmp((isSigned ?  +  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?               ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&           "Lo is not <= Hi in range emission code!"); @@ -3675,10 +3711,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,      }      // Emit V-Lo <u Hi-Lo -    Constant *NegLo = Context->getConstantExprNeg(Lo); -    Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off"); -    InsertNewInstBefore(Add, IB); -    Constant *UpperBound = Context->getConstantExprAdd(NegLo, Hi); +    Constant *NegLo = ConstantExpr::getNeg(Lo); +    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); +    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);      return new ICmpInst(ICmpInst::ICMP_ULT, Add, UpperBound);    } @@ -3686,7 +3721,7 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,      return new ICmpInst(ICmpInst::ICMP_EQ, V, V);    // V < Min || V >= Hi -> V > Hi-1 -  Hi = SubOne(cast<ConstantInt>(Hi), Context); +  Hi = SubOne(cast<ConstantInt>(Hi));    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {      ICmpInst::Predicate pred = (isSigned ?           ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT); @@ -3695,10 +3730,9 @@ Instruction *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,    // Emit V-Lo >u Hi-1-Lo    // Note that Hi has already had one subtracted from it, above. -  ConstantInt *NegLo = cast<ConstantInt>(Context->getConstantExprNeg(Lo)); -  Instruction *Add = BinaryOperator::CreateAdd(V, NegLo, V->getName()+".off"); -  InsertNewInstBefore(Add, IB); -  Constant *LowerBound = Context->getConstantExprAdd(NegLo, Hi); +  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo)); +  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off"); +  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);    return new ICmpInst(ICmpInst::ICMP_UGT, Add, LowerBound);  } @@ -3740,7 +3774,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,    switch (LHSI->getOpcode()) {    default: return 0;    case Instruction::And: -    if (Context->getConstantExprAnd(N, Mask) == Mask) { +    if (ConstantExpr::getAnd(N, Mask) == Mask) {        // If the AndRHS is a power of two minus one (0+1+), this is simple.        if ((Mask->getValue().countLeadingZeros() +              Mask->getValue().countPopulation()) ==  @@ -3764,17 +3798,14 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,      // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0      if ((Mask->getValue().countLeadingZeros() +            Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() -        && Context->getConstantExprAnd(N, Mask)->isNullValue()) +        && ConstantExpr::getAnd(N, Mask)->isNullValue())        break;      return 0;    } -  Instruction *New;    if (isSub) -    New = BinaryOperator::CreateSub(LHSI->getOperand(0), RHS, "fold"); -  else -    New = BinaryOperator::CreateAdd(LHSI->getOperand(0), RHS, "fold"); -  return InsertNewInstBefore(New, I); +    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold"); +  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");  }  /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -3785,16 +3816,17 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,    ICmpInst::Predicate LHSCC, RHSCC;    // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2). -  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || -      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) +  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), +                         m_ConstantInt(LHSCst))) || +      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), +                         m_ConstantInt(RHSCst))))      return 0;    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)    // where C is a power of 2    if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT &&        LHSCst->getValue().isPowerOf2()) { -    Instruction *NewOr = BinaryOperator::CreateOr(Val, Val2); -    InsertNewInstBefore(NewOr, I); +    Value *NewOr = Builder->CreateOr(Val, Val2);      return new ICmpInst(LHSCC, NewOr, LHSCst);    } @@ -3837,14 +3869,14 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,    assert(LHSCst != RHSCst && "Compares not folded above?");    switch (LHSCC) { -  default: assert(0 && "Unknown integer condition code!"); +  default: llvm_unreachable("Unknown integer condition code!");    case ICmpInst::ICMP_EQ:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X == 13 & X == 15) -> false      case ICmpInst::ICMP_UGT:        // (X == 13 & X >  15) -> false      case ICmpInst::ICMP_SGT:        // (X == 13 & X >  15) -> false -      return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));      case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13      case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13      case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13 @@ -3852,13 +3884,13 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,      }    case ICmpInst::ICMP_NE:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_ULT: -      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X u< 14) -> X < 13 +      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13          return new ICmpInst(ICmpInst::ICMP_ULT, Val, LHSCst);        break;                        // (X != 13 & X u< 15) -> no change      case ICmpInst::ICMP_SLT: -      if (LHSCst == SubOne(RHSCst, Context)) // (X != 13 & X s< 14) -> X < 13 +      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13          return new ICmpInst(ICmpInst::ICMP_SLT, Val, LHSCst);        break;                        // (X != 13 & X s< 15) -> no change      case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15 @@ -3866,23 +3898,21 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,      case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15        return ReplaceInstUsesWith(I, RHS);      case ICmpInst::ICMP_NE: -      if (LHSCst == SubOne(RHSCst, Context)){// (X != 13 & X != 14) -> X-13 >u 1 -        Constant *AddCST = Context->getConstantExprNeg(LHSCst); -        Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, -                                                     Val->getName()+".off"); -        InsertNewInstBefore(Add, I); +      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1 +        Constant *AddCST = ConstantExpr::getNeg(LHSCst); +        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");          return new ICmpInst(ICmpInst::ICMP_UGT, Add, -                            Context->getConstantInt(Add->getType(), 1)); +                            ConstantInt::get(Add->getType(), 1));        }        break;                        // (X != 13 & X != 15) -> no change      }      break;    case ICmpInst::ICMP_ULT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false      case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false -      return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));      case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change        break;      case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13 @@ -3894,10 +3924,10 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,      break;    case ICmpInst::ICMP_SLT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X s< 13 & X == 15) -> false      case ICmpInst::ICMP_SGT:        // (X s< 13 & X s> 15) -> false -      return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));      case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change        break;      case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13 @@ -3909,18 +3939,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,      break;    case ICmpInst::ICMP_UGT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15      case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15        return ReplaceInstUsesWith(I, RHS);      case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change        break;      case ICmpInst::ICMP_NE: -      if (RHSCst == AddOne(LHSCst, Context)) // (X u> 13 & X != 14) -> X u> 14 +      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14          return new ICmpInst(LHSCC, Val, RHSCst);        break;                        // (X u> 13 & X != 15) -> no change      case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1 -      return InsertRangeTest(Val, AddOne(LHSCst, Context), +      return InsertRangeTest(Val, AddOne(LHSCst),                               RHSCst, false, true, I);      case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change        break; @@ -3928,18 +3958,18 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,      break;    case ICmpInst::ICMP_SGT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15      case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15        return ReplaceInstUsesWith(I, RHS);      case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change        break;      case ICmpInst::ICMP_NE: -      if (RHSCst == AddOne(LHSCst, Context)) // (X s> 13 & X != 14) -> X s> 14 +      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14          return new ICmpInst(LHSCC, Val, RHSCst);        break;                        // (X s> 13 & X != 15) -> no change      case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1 -      return InsertRangeTest(Val, AddOne(LHSCst, Context), +      return InsertRangeTest(Val, AddOne(LHSCst),                               RHSCst, true, true, I);      case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change        break; @@ -3950,13 +3980,89 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I,    return 0;  } +Instruction *InstCombiner::FoldAndOfFCmps(Instruction &I, FCmpInst *LHS, +                                          FCmpInst *RHS) { +   +  if (LHS->getPredicate() == FCmpInst::FCMP_ORD && +      RHS->getPredicate() == FCmpInst::FCMP_ORD) { +    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y) +    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) +      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { +        // If either of the constants are nans, then the whole thing returns +        // false. +        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) +          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); +        return new FCmpInst(FCmpInst::FCMP_ORD, +                            LHS->getOperand(0), RHS->getOperand(0)); +      } +     +    // Handle vector zeros.  This occurs because the canonical form of +    // "fcmp ord x,x" is "fcmp ord x, 0". +    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && +        isa<ConstantAggregateZero>(RHS->getOperand(1))) +      return new FCmpInst(FCmpInst::FCMP_ORD, +                          LHS->getOperand(0), RHS->getOperand(0)); +    return 0; +  } +   +  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); +  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); +  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); +   +   +  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { +    // Swap RHS operands to match LHS. +    Op1CC = FCmpInst::getSwappedPredicate(Op1CC); +    std::swap(Op1LHS, Op1RHS); +  } +   +  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { +    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). +    if (Op0CC == Op1CC) +      return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); +     +    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE) +      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); +    if (Op0CC == FCmpInst::FCMP_TRUE) +      return ReplaceInstUsesWith(I, RHS); +    if (Op1CC == FCmpInst::FCMP_TRUE) +      return ReplaceInstUsesWith(I, LHS); +     +    bool Op0Ordered; +    bool Op1Ordered; +    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); +    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); +    if (Op1Pred == 0) { +      std::swap(LHS, RHS); +      std::swap(Op0Pred, Op1Pred); +      std::swap(Op0Ordered, Op1Ordered); +    } +    if (Op0Pred == 0) { +      // uno && ueq -> uno && (uno || eq) -> ueq +      // ord && olt -> ord && (ord && lt) -> olt +      if (Op0Ordered == Op1Ordered) +        return ReplaceInstUsesWith(I, RHS); +       +      // uno && oeq -> uno && (ord && eq) -> false +      // uno && ord -> false +      if (!Op0Ordered) +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context)); +      // ord && ueq -> ord && (uno || eq) -> oeq +      return cast<Instruction>(getFCmpValue(true, Op1Pred, +                                            Op0LHS, Op0RHS, Context)); +    } +  } + +  return 0; +} +  Instruction *InstCombiner::visitAnd(BinaryOperator &I) {    bool Changed = SimplifyCommutative(I);    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);    if (isa<UndefValue>(Op1))                         // X & undef -> 0 -    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    // and X, X = X    if (Op0 == Op1) @@ -3976,36 +4082,32 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {    }    if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { -    const APInt& AndRHSMask = AndRHS->getValue(); +    const APInt &AndRHSMask = AndRHS->getValue();      APInt NotAndRHS(~AndRHSMask);      // Optimize a variety of ((val OP C1) & C2) combinations... -    if (isa<BinaryOperator>(Op0)) { -      Instruction *Op0I = cast<Instruction>(Op0); +    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {        Value *Op0LHS = Op0I->getOperand(0);        Value *Op0RHS = Op0I->getOperand(1);        switch (Op0I->getOpcode()) { +      default: break;        case Instruction::Xor:        case Instruction::Or:          // If the mask is only needed on one incoming arm, push it up. -        if (Op0I->hasOneUse()) { -          if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { -            // Not masking anything out for the LHS, move to RHS. -            Instruction *NewRHS = BinaryOperator::CreateAnd(Op0RHS, AndRHS, -                                                   Op0RHS->getName()+".masked"); -            InsertNewInstBefore(NewRHS, I); -            return BinaryOperator::Create( -                       cast<BinaryOperator>(Op0I)->getOpcode(), Op0LHS, NewRHS); -          } -          if (!isa<Constant>(Op0RHS) && -              MaskedValueIsZero(Op0RHS, NotAndRHS)) { -            // Not masking anything out for the RHS, move to LHS. -            Instruction *NewLHS = BinaryOperator::CreateAnd(Op0LHS, AndRHS, -                                                   Op0LHS->getName()+".masked"); -            InsertNewInstBefore(NewLHS, I); -            return BinaryOperator::Create( -                       cast<BinaryOperator>(Op0I)->getOpcode(), NewLHS, Op0RHS); -          } +        if (!Op0I->hasOneUse()) break; +           +        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) { +          // Not masking anything out for the LHS, move to RHS. +          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, +                                             Op0RHS->getName()+".masked"); +          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); +        } +        if (!isa<Constant>(Op0RHS) && +            MaskedValueIsZero(Op0RHS, NotAndRHS)) { +          // Not masking anything out for the RHS, move to LHS. +          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, +                                             Op0LHS->getName()+".masked"); +          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);          }          break; @@ -4036,8 +4138,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {            ConstantInt *A = dyn_cast<ConstantInt>(Op0LHS);            if (!(A && A->isZero()) &&               // avoid infinite recursion.                MaskedValueIsZero(Op0LHS, Mask)) { -            Instruction *NewNeg = BinaryOperator::CreateNeg(Op0RHS); -            InsertNewInstBefore(NewNeg, I); +            Value *NewNeg = Builder->CreateNeg(Op0RHS);              return BinaryOperator::CreateAnd(NewNeg, AndRHS);            }          } @@ -4048,9 +4149,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {          // (1 << x) & 1 --> zext(x == 0)          // (1 >> x) & 1 --> zext(x == 0)          if (AndRHSMask == 1 && Op0LHS == AndRHS) { -          Instruction *NewICmp = new ICmpInst(ICmpInst::ICMP_EQ, Op0RHS, -                                           Context->getNullValue(I.getType())); -          InsertNewInstBefore(NewICmp, I); +          Value *NewICmp = +            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));            return new ZExtInst(NewICmp, I.getType());          }          break; @@ -4072,21 +4172,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {                // into  : and (cast X to T), trunc_or_bitcast(C1)&C2                // This will fold the two constants together, which may allow                 // other simplifications. -              Instruction *NewCast = CastInst::CreateTruncOrBitCast( +              Value *NewCast = Builder->CreateTruncOrBitCast(                  CastOp->getOperand(0), I.getType(),                   CastOp->getName()+".shrunk"); -              NewCast = InsertNewInstBefore(NewCast, I);                // trunc_or_bitcast(C1)&C2 -              Constant *C3 = -                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType()); -              C3 = Context->getConstantExprAnd(C3, AndRHS); +              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); +              C3 = ConstantExpr::getAnd(C3, AndRHS);                return BinaryOperator::CreateAnd(NewCast, C3);              } else if (CastOp->getOpcode() == Instruction::Or) {                // Change: and (cast (or X, C1) to T), C2                // into  : trunc(C1)&C2 iff trunc(C1)&C2 == C2 -              Constant *C3 = -                      Context->getConstantExprTruncOrBitCast(AndCI,I.getType()); -              if (Context->getConstantExprAnd(C3, AndRHS) == AndRHS) +              Constant *C3 = ConstantExpr::getTruncOrBitCast(AndCI,I.getType()); +              if (ConstantExpr::getAnd(C3, AndRHS) == AndRHS)                  // trunc(C1)&C2                  return ReplaceInstUsesWith(I, AndRHS);              } @@ -4103,17 +4200,16 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {          return NV;    } -  Value *Op0NotVal = dyn_castNotVal(Op0, Context); -  Value *Op1NotVal = dyn_castNotVal(Op1, Context); +  Value *Op0NotVal = dyn_castNotVal(Op0); +  Value *Op1NotVal = dyn_castNotVal(Op1);    if (Op0NotVal == Op1 || Op1NotVal == Op0)  // A & ~A  == ~A & A == 0 -    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    // (~A & ~B) == (~(A | B)) - De Morgan's Law    if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) { -    Instruction *Or = BinaryOperator::CreateOr(Op0NotVal, Op1NotVal, -                                               I.getName()+".demorgan"); -    InsertNewInstBefore(Or, I); +    Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal, +                                  I.getName()+".demorgan");      return BinaryOperator::CreateNot(Or);    } @@ -4159,11 +4255,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {          cast<BinaryOperator>(Op1)->swapOperands();          std::swap(A, B);        } -      if (A == Op0) {                                // A&(A^B) -> A & ~B -        Instruction *NotB = BinaryOperator::CreateNot(B, "tmp"); -        InsertNewInstBefore(NotB, I); -        return BinaryOperator::CreateAnd(A, NotB); -      } +      if (A == Op0)                                // A&(A^B) -> A & ~B +        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));      }      // (A&((~A)|B)) -> A&B @@ -4177,7 +4270,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {    if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1)) {      // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B) -    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) +    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))        return R;      if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0)) @@ -4190,16 +4283,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {      if (CastInst *Op1C = dyn_cast<CastInst>(Op1))        if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind ?          const Type *SrcTy = Op0C->getOperand(0)->getType(); -        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && +        if (SrcTy == Op1C->getOperand(0)->getType() && +            SrcTy->isIntOrIntVector() &&              // Only do this if the casts both really cause code to be generated.              ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),                                 I.getType(), TD) &&              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),                                 I.getType(), TD)) { -          Instruction *NewOp = BinaryOperator::CreateAnd(Op0C->getOperand(0), -                                                         Op1C->getOperand(0), -                                                         I.getName()); -          InsertNewInstBefore(NewOp, I); +          Value *NewOp = Builder->CreateAnd(Op0C->getOperand(0), +                                            Op1C->getOperand(0), I.getName());            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());          }        } @@ -4210,10 +4302,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {        if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&             SI0->getOperand(1) == SI1->getOperand(1) &&            (SI0->hasOneUse() || SI1->hasOneUse())) { -        Instruction *NewOp = -          InsertNewInstBefore(BinaryOperator::CreateAnd(SI0->getOperand(0), -                                                        SI1->getOperand(0), -                                                        SI0->getName()), I); +        Value *NewOp = +          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0), +                             SI0->getName());          return BinaryOperator::Create(SI1->getOpcode(), NewOp,                                         SI1->getOperand(1));        } @@ -4221,66 +4312,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {    // If and'ing two fcmp, try combine them into one.    if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { -    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) { -      if (LHS->getPredicate() == FCmpInst::FCMP_ORD && -          RHS->getPredicate() == FCmpInst::FCMP_ORD) { -        // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y) -        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) -          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { -            // If either of the constants are nans, then the whole thing returns -            // false. -            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) -              return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); -            return new FCmpInst(FCmpInst::FCMP_ORD, LHS->getOperand(0), -                                RHS->getOperand(0)); -          } -      } else { -        Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; -        FCmpInst::Predicate Op0CC, Op1CC; -        if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && -            match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { -          if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { -            // Swap RHS operands to match LHS. -            Op1CC = FCmpInst::getSwappedPredicate(Op1CC); -            std::swap(Op1LHS, Op1RHS); -          } -          if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { -            // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y). -            if (Op0CC == Op1CC) -              return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); -            else if (Op0CC == FCmpInst::FCMP_FALSE || -                     Op1CC == FCmpInst::FCMP_FALSE) -              return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); -            else if (Op0CC == FCmpInst::FCMP_TRUE) -              return ReplaceInstUsesWith(I, Op1); -            else if (Op1CC == FCmpInst::FCMP_TRUE) -              return ReplaceInstUsesWith(I, Op0); -            bool Op0Ordered; -            bool Op1Ordered; -            unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); -            unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); -            if (Op1Pred == 0) { -              std::swap(Op0, Op1); -              std::swap(Op0Pred, Op1Pred); -              std::swap(Op0Ordered, Op1Ordered); -            } -            if (Op0Pred == 0) { -              // uno && ueq -> uno && (uno || eq) -> ueq -              // ord && olt -> ord && (ord && lt) -> olt -              if (Op0Ordered == Op1Ordered) -                return ReplaceInstUsesWith(I, Op1); -              // uno && oeq -> uno && (ord && eq) -> false -              // uno && ord -> false -              if (!Op0Ordered) -                return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); -              // ord && ueq -> ord && (uno || eq) -> oeq -              return cast<Instruction>(getFCmpValue(true, Op1Pred, -                                                    Op0LHS, Op0RHS, Context)); -            } -          } -        } -      } -    } +    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) +      if (Instruction *Res = FoldAndOfFCmps(I, LHS, RHS)) +        return Res;    }    return Changed ? &I : 0; @@ -4450,7 +4484,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {  /// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then  /// we can simplify this expression to "cond ? C : D or B".  static Instruction *MatchSelectFromAndOr(Value *A, Value *B, -                                         Value *C, Value *D) { +                                         Value *C, Value *D, +                                         LLVMContext *Context) {    // If A is not a select of -1/0, this cannot match.    Value *Cond = 0;    if (!match(A, m_SelectCst<-1, 0>(m_Value(Cond)))) @@ -4477,8 +4512,10 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,    ICmpInst::Predicate LHSCC, RHSCC;    // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). -  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || -      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) +  if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), +             m_ConstantInt(LHSCst))) || +      !match(RHS, m_ICmp(RHSCC, m_Value(Val2), +             m_ConstantInt(RHSCst))))      return 0;    // From here on, we only handle: @@ -4520,18 +4557,16 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,    assert(LHSCst != RHSCst && "Compares not folded above?");    switch (LHSCC) { -  default: assert(0 && "Unknown integer condition code!"); +  default: llvm_unreachable("Unknown integer condition code!");    case ICmpInst::ICMP_EQ:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ: -      if (LHSCst == SubOne(RHSCst, Context)) { +      if (LHSCst == SubOne(RHSCst)) {          // (X == 13 | X == 14) -> X-13 <u 2 -        Constant *AddCST = Context->getConstantExprNeg(LHSCst); -        Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST, -                                                     Val->getName()+".off"); -        InsertNewInstBefore(Add, I); -        AddCST = Context->getConstantExprSub(AddOne(RHSCst, Context), LHSCst); +        Constant *AddCST = ConstantExpr::getNeg(LHSCst); +        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off"); +        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);          return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);        }        break;                         // (X == 13 | X == 15) -> no change @@ -4546,7 +4581,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,      break;    case ICmpInst::ICMP_NE:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13      case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13      case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13 @@ -4554,12 +4589,12 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,      case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true      case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true      case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true -      return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));      }      break;    case ICmpInst::ICMP_ULT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change        break;      case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2 @@ -4567,7 +4602,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,        // this can cause overflow.        if (RHSCst->isMaxValue(false))          return ReplaceInstUsesWith(I, LHS); -      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context), +      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),                               false, false, I);      case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change        break; @@ -4580,7 +4615,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,      break;    case ICmpInst::ICMP_SLT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change        break;      case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2 @@ -4588,7 +4623,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,        // this can cause overflow.        if (RHSCst->isMaxValue(true))          return ReplaceInstUsesWith(I, LHS); -      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst, Context), +      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst),                               true, false, I);      case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change        break; @@ -4601,7 +4636,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,      break;    case ICmpInst::ICMP_UGT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13      case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13        return ReplaceInstUsesWith(I, LHS); @@ -4609,14 +4644,14 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,        break;      case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true      case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true -      return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));      case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change        break;      }      break;    case ICmpInst::ICMP_SGT:      switch (RHSCC) { -    default: assert(0 && "Unknown integer condition code!"); +    default: llvm_unreachable("Unknown integer condition code!");      case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13      case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13        return ReplaceInstUsesWith(I, LHS); @@ -4624,7 +4659,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,        break;      case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true      case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true -      return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));      case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change        break;      } @@ -4633,6 +4668,72 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,    return 0;  } +Instruction *InstCombiner::FoldOrOfFCmps(Instruction &I, FCmpInst *LHS, +                                         FCmpInst *RHS) { +  if (LHS->getPredicate() == FCmpInst::FCMP_UNO && +      RHS->getPredicate() == FCmpInst::FCMP_UNO &&  +      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { +    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) +      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { +        // If either of the constants are nans, then the whole thing returns +        // true. +        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) +          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +         +        // Otherwise, no need to compare the two constants, compare the +        // rest. +        return new FCmpInst(FCmpInst::FCMP_UNO, +                            LHS->getOperand(0), RHS->getOperand(0)); +      } +     +    // Handle vector zeros.  This occurs because the canonical form of +    // "fcmp uno x,x" is "fcmp uno x, 0". +    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && +        isa<ConstantAggregateZero>(RHS->getOperand(1))) +      return new FCmpInst(FCmpInst::FCMP_UNO, +                          LHS->getOperand(0), RHS->getOperand(0)); +     +    return 0; +  } +   +  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); +  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1); +  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate(); +   +  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { +    // Swap RHS operands to match LHS. +    Op1CC = FCmpInst::getSwappedPredicate(Op1CC); +    std::swap(Op1LHS, Op1RHS); +  } +  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { +    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). +    if (Op0CC == Op1CC) +      return new FCmpInst((FCmpInst::Predicate)Op0CC, +                          Op0LHS, Op0RHS); +    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE) +      return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +    if (Op0CC == FCmpInst::FCMP_FALSE) +      return ReplaceInstUsesWith(I, RHS); +    if (Op1CC == FCmpInst::FCMP_FALSE) +      return ReplaceInstUsesWith(I, LHS); +    bool Op0Ordered; +    bool Op1Ordered; +    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); +    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); +    if (Op0Ordered == Op1Ordered) { +      // If both are ordered or unordered, return a new fcmp with +      // or'ed predicates. +      Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, +                               Op0LHS, Op0RHS, Context); +      if (Instruction *I = dyn_cast<Instruction>(RV)) +        return I; +      // Otherwise, it's a constant boolean value... +      return ReplaceInstUsesWith(I, RV); +    } +  } +  return 0; +} +  /// FoldOrWithConstants - This helper function folds:  ///  ///     ((A | B) & C1) | (B & C2) @@ -4655,8 +4756,7 @@ Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,    if (!Xor.isAllOnesValue()) return 0;    if (V1 == A || V1 == B) { -    Instruction *NewOp = -      InsertNewInstBefore(BinaryOperator::CreateAnd((V1 == A) ? B : A, CI1), I); +    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);      return BinaryOperator::CreateOr(NewOp, V1);    } @@ -4668,7 +4768,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);    if (isa<UndefValue>(Op1))                       // X | undef -> -1 -    return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); +    return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));    // or X, X = X    if (Op0 == Op1) @@ -4691,21 +4791,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {      ConstantInt *C1 = 0; Value *X = 0;      // (X & C1) | C2 --> (X | C2) & (C1|C2) -    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { -      Instruction *Or = BinaryOperator::CreateOr(X, RHS); -      InsertNewInstBefore(Or, I); +    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && +        isOnlyUse(Op0)) { +      Value *Or = Builder->CreateOr(X, RHS);        Or->takeName(Op0);        return BinaryOperator::CreateAnd(Or,  -               Context->getConstantInt(RHS->getValue() | C1->getValue())); +               ConstantInt::get(*Context, RHS->getValue() | C1->getValue()));      }      // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) -    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && isOnlyUse(Op0)) { -      Instruction *Or = BinaryOperator::CreateOr(X, RHS); -      InsertNewInstBefore(Or, I); +    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) && +        isOnlyUse(Op0)) { +      Value *Or = Builder->CreateOr(X, RHS);        Or->takeName(Op0);        return BinaryOperator::CreateXor(Or, -                 Context->getConstantInt(C1->getValue() & ~RHS->getValue())); +                 ConstantInt::get(*Context, C1->getValue() & ~RHS->getValue()));      }      // Try to fold constant and into select arguments. @@ -4738,19 +4838,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {    }    // (X^C)|Y -> (X|Y)^C iff Y&C == 0 -  if (Op0->hasOneUse() && match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) && +  if (Op0->hasOneUse() && +      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&        MaskedValueIsZero(Op1, C1->getValue())) { -    Instruction *NOr = BinaryOperator::CreateOr(A, Op1); -    InsertNewInstBefore(NOr, I); +    Value *NOr = Builder->CreateOr(A, Op1);      NOr->takeName(Op0);      return BinaryOperator::CreateXor(NOr, C1);    }    // Y|(X^C) -> (X|Y)^C iff Y&C == 0 -  if (Op1->hasOneUse() && match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) && +  if (Op1->hasOneUse() && +      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&        MaskedValueIsZero(Op0, C1->getValue())) { -    Instruction *NOr = BinaryOperator::CreateOr(A, Op0); -    InsertNewInstBefore(NOr, I); +    Value *NOr = Builder->CreateOr(A, Op0);      NOr->takeName(Op0);      return BinaryOperator::CreateXor(NOr, C1);    } @@ -4801,20 +4901,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {          V1 = C, V2 = A, V3 = B;        if (V1) { -        Value *Or = -          InsertNewInstBefore(BinaryOperator::CreateOr(V2, V3, "tmp"), I); +        Value *Or = Builder->CreateOr(V2, V3, "tmp");          return BinaryOperator::CreateAnd(V1, Or);        }      }      // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants -    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D)) +    if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D, Context))        return Match; -    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C)) +    if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C, Context))        return Match; -    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D)) +    if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D, Context))        return Match; -    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C)) +    if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C, Context))        return Match;      // ((A&~B)|(~A&B)) -> A^B @@ -4841,10 +4940,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {        if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() &&             SI0->getOperand(1) == SI1->getOperand(1) &&            (SI0->hasOneUse() || SI1->hasOneUse())) { -        Instruction *NewOp = -        InsertNewInstBefore(BinaryOperator::CreateOr(SI0->getOperand(0), -                                                     SI1->getOperand(0), -                                                     SI0->getName()), I); +        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0), +                                         SI0->getName());          return BinaryOperator::Create(SI1->getOpcode(), NewOp,                                         SI1->getOperand(1));        } @@ -4865,26 +4962,25 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {    if (match(Op0, m_Not(m_Value(A)))) {   // ~A | Op1      if (A == Op1)   // ~A | A == -1 -      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));    } else {      A = 0;    }    // Note, A is still live here!    if (match(Op1, m_Not(m_Value(B)))) {   // Op0 | ~B      if (Op0 == B) -      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));      // (~A | ~B) == (~(A & B)) - De Morgan's Law      if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) { -      Value *And = InsertNewInstBefore(BinaryOperator::CreateAnd(A, B, -                                              I.getName()+".demorgan"), I); +      Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan");        return BinaryOperator::CreateNot(And);      }    }    // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)    if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) { -    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) +    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))        return R;      if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) @@ -4899,17 +4995,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {          if (!isa<ICmpInst>(Op0C->getOperand(0)) ||              !isa<ICmpInst>(Op1C->getOperand(0))) {            const Type *SrcTy = Op0C->getOperand(0)->getType(); -          if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isInteger() && +          if (SrcTy == Op1C->getOperand(0)->getType() && +              SrcTy->isIntOrIntVector() &&                // Only do this if the casts both really cause code to be                // generated.                ValueRequiresCast(Op0C->getOpcode(), Op0C->getOperand(0),                                   I.getType(), TD) &&                ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),                                   I.getType(), TD)) { -            Instruction *NewOp = BinaryOperator::CreateOr(Op0C->getOperand(0), -                                                          Op1C->getOperand(0), -                                                          I.getName()); -            InsertNewInstBefore(NewOp, I); +            Value *NewOp = Builder->CreateOr(Op0C->getOperand(0), +                                             Op1C->getOperand(0), I.getName());              return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());            }          } @@ -4919,61 +5014,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {    // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)    if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0))) { -    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) { -      if (LHS->getPredicate() == FCmpInst::FCMP_UNO && -          RHS->getPredicate() == FCmpInst::FCMP_UNO &&  -          LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) { -        if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1))) -          if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) { -            // If either of the constants are nans, then the whole thing returns -            // true. -            if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) -              return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -             -            // Otherwise, no need to compare the two constants, compare the -            // rest. -            return new FCmpInst(FCmpInst::FCMP_UNO, LHS->getOperand(0), -                                RHS->getOperand(0)); -          } -      } else { -        Value *Op0LHS, *Op0RHS, *Op1LHS, *Op1RHS; -        FCmpInst::Predicate Op0CC, Op1CC; -        if (match(Op0, m_FCmp(Op0CC, m_Value(Op0LHS), m_Value(Op0RHS))) && -            match(Op1, m_FCmp(Op1CC, m_Value(Op1LHS), m_Value(Op1RHS)))) { -          if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) { -            // Swap RHS operands to match LHS. -            Op1CC = FCmpInst::getSwappedPredicate(Op1CC); -            std::swap(Op1LHS, Op1RHS); -          } -          if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) { -            // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y). -            if (Op0CC == Op1CC) -              return new FCmpInst((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS); -            else if (Op0CC == FCmpInst::FCMP_TRUE || -                     Op1CC == FCmpInst::FCMP_TRUE) -              return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -            else if (Op0CC == FCmpInst::FCMP_FALSE) -              return ReplaceInstUsesWith(I, Op1); -            else if (Op1CC == FCmpInst::FCMP_FALSE) -              return ReplaceInstUsesWith(I, Op0); -            bool Op0Ordered; -            bool Op1Ordered; -            unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered); -            unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered); -            if (Op0Ordered == Op1Ordered) { -              // If both are ordered or unordered, return a new fcmp with -              // or'ed predicates. -              Value *RV = getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, -                                       Op0LHS, Op0RHS, Context); -              if (Instruction *I = dyn_cast<Instruction>(RV)) -                return I; -              // Otherwise, it's a constant boolean value... -              return ReplaceInstUsesWith(I, RV); -            } -          } -        } -      } -    } +    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1))) +      if (Instruction *Res = FoldOrOfFCmps(I, LHS, RHS)) +        return Res;    }    return Changed ? &I : 0; @@ -5001,14 +5044,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {      if (isa<UndefValue>(Op0))        // Handle undef ^ undef -> 0 special case. This is a common        // idiom (misuse). -      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));      return ReplaceInstUsesWith(I, Op1);  // X ^ undef -> undef    }    // xor X, X = 0, even if X is nested in a sequence of Xor's. -  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1), Context)) { +  if (Instruction *Result = AssociativeOpt(I, XorSelf(Op1))) {      assert(Result == &I && "AssociativeOpt didn't work?"); Result=Result; -    return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    }    // See if we can simplify any instructions used by the instruction whose sole  @@ -5020,22 +5063,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {        return ReplaceInstUsesWith(I, Op0);  // X ^ <0,0> -> X    // Is this a ~ operation? -  if (Value *NotOp = dyn_castNotVal(&I, Context)) { +  if (Value *NotOp = dyn_castNotVal(&I)) {      // ~(~X & Y) --> (X | ~Y) - De Morgan's Law      // ~(~X | Y) === (X & ~Y) - De Morgan's Law      if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {        if (Op0I->getOpcode() == Instruction::And ||             Op0I->getOpcode() == Instruction::Or) { -        if (dyn_castNotVal(Op0I->getOperand(1), Context)) Op0I->swapOperands(); -        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0), Context)) { -          Instruction *NotY = -            BinaryOperator::CreateNot(Op0I->getOperand(1), -                                      Op0I->getOperand(1)->getName()+".not"); -          InsertNewInstBefore(NotY, I); +        if (dyn_castNotVal(Op0I->getOperand(1))) Op0I->swapOperands(); +        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) { +          Value *NotY = +            Builder->CreateNot(Op0I->getOperand(1), +                               Op0I->getOperand(1)->getName()+".not");            if (Op0I->getOpcode() == Instruction::And)              return BinaryOperator::CreateOr(Op0NotVal, NotY); -          else -            return BinaryOperator::CreateAnd(Op0NotVal, NotY); +          return BinaryOperator::CreateAnd(Op0NotVal, NotY);          }        }      } @@ -5043,7 +5084,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {    if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) { -    if (RHS == Context->getConstantIntTrue() && Op0->hasOneUse()) { +    if (RHS->isOne() && Op0->hasOneUse()) {        // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B        if (ICmpInst *ICI = dyn_cast<ICmpInst>(Op0))          return new ICmpInst(ICI->getInversePredicate(), @@ -5059,16 +5100,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {        if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {          if (CI->hasOneUse() && Op0C->hasOneUse()) {            Instruction::CastOps Opcode = Op0C->getOpcode(); -          if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) { -            if (RHS == Context->getConstantExprCast(Opcode,  -                                             Context->getConstantIntTrue(), -                                             Op0C->getDestTy())) { -              Instruction *NewCI = InsertNewInstBefore(CmpInst::Create( -                                     CI->getOpcode(), CI->getInversePredicate(), -                                     CI->getOperand(0), CI->getOperand(1)), I); -              NewCI->takeName(CI); -              return CastInst::Create(Opcode, NewCI, Op0C->getType()); -            } +          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && +              (RHS == ConstantExpr::getCast(Opcode,  +                                            ConstantInt::getTrue(*Context), +                                            Op0C->getDestTy()))) { +            CI->setPredicate(CI->getInversePredicate()); +            return CastInst::Create(Opcode, CI, Op0C->getType());            }          }        } @@ -5078,9 +5115,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {        // ~(c-X) == X-c-1 == X+(-c-1)        if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())          if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { -          Constant *NegOp0I0C = Context->getConstantExprNeg(Op0I0C); -          Constant *ConstantRHS = Context->getConstantExprSub(NegOp0I0C, -                                      Context->getConstantInt(I.getType(), 1)); +          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); +          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C, +                                      ConstantInt::get(I.getType(), 1));            return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);          } @@ -5088,28 +5125,28 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {          if (Op0I->getOpcode() == Instruction::Add) {            // ~(X-c) --> (-c-1)-X            if (RHS->isAllOnesValue()) { -            Constant *NegOp0CI = Context->getConstantExprNeg(Op0CI); +            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);              return BinaryOperator::CreateSub( -                           Context->getConstantExprSub(NegOp0CI, -                                      Context->getConstantInt(I.getType(), 1)), +                           ConstantExpr::getSub(NegOp0CI, +                                      ConstantInt::get(I.getType(), 1)),                                        Op0I->getOperand(0));            } else if (RHS->getValue().isSignBit()) {              // (X + C) ^ signbit -> (X + C + signbit) -            Constant *C = -                   Context->getConstantInt(RHS->getValue() + Op0CI->getValue()); +            Constant *C = ConstantInt::get(*Context, +                                           RHS->getValue() + Op0CI->getValue());              return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);            }          } else if (Op0I->getOpcode() == Instruction::Or) {            // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0            if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) { -            Constant *NewRHS = Context->getConstantExprOr(Op0CI, RHS); +            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);              // Anything in both C1 and C2 is known to be zero, remove it from              // NewRHS. -            Constant *CommonBits = Context->getConstantExprAnd(Op0CI, RHS); -            NewRHS = Context->getConstantExprAnd(NewRHS,  -                                       Context->getConstantExprNot(CommonBits)); -            AddToWorkList(Op0I); +            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS); +            NewRHS = ConstantExpr::getAnd(NewRHS,  +                                       ConstantExpr::getNot(CommonBits)); +            Worklist.Add(Op0I);              I.setOperand(0, Op0I->getOperand(0));              I.setOperand(1, NewRHS);              return &I; @@ -5127,13 +5164,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {          return NV;    } -  if (Value *X = dyn_castNotVal(Op0, Context))   // ~A ^ A == -1 +  if (Value *X = dyn_castNotVal(Op0))   // ~A ^ A == -1      if (X == Op1) -      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType())); -  if (Value *X = dyn_castNotVal(Op1, Context))   // A ^ ~A == -1 +  if (Value *X = dyn_castNotVal(Op1))   // A ^ ~A == -1      if (X == Op0) -      return ReplaceInstUsesWith(I, Context->getAllOnesValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));    BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1); @@ -5152,7 +5189,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {        return ReplaceInstUsesWith(I, B);                      // A^(A^B) == B      } else if (match(Op1I, m_Xor(m_Value(A), m_Specific(Op0)))) {        return ReplaceInstUsesWith(I, A);                      // A^(B^A) == B -    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && Op1I->hasOneUse()){ +    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) &&  +               Op1I->hasOneUse()){        if (A == Op0) {                                      // A^(A&B) -> A^(B&A)          Op1I->swapOperands();          std::swap(A, B); @@ -5167,26 +5205,23 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {    BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);    if (Op0I) {      Value *A, *B; -    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && Op0I->hasOneUse()) { +    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) && +        Op0I->hasOneUse()) {        if (A == Op1)                                  // (B|A)^B == (A|B)^B          std::swap(A, B); -      if (B == Op1) {                                // (A|B)^B == A & ~B -        Instruction *NotB = -          InsertNewInstBefore(BinaryOperator::CreateNot(Op1, "tmp"), I); -        return BinaryOperator::CreateAnd(A, NotB); -      } +      if (B == Op1)                                  // (A|B)^B == A & ~B +        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));      } else if (match(Op0I, m_Xor(m_Specific(Op1), m_Value(B)))) {        return ReplaceInstUsesWith(I, B);                      // (A^B)^A == B      } else if (match(Op0I, m_Xor(m_Value(A), m_Specific(Op1)))) {        return ReplaceInstUsesWith(I, A);                      // (B^A)^A == B -    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && Op0I->hasOneUse()){ +    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&  +               Op0I->hasOneUse()){        if (A == Op1)                                        // (A&B)^A -> (B&A)^A          std::swap(A, B);        if (B == Op1 &&                                      // (B&A)^A == ~B & A            !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C -        Instruction *N = -          InsertNewInstBefore(BinaryOperator::CreateNot(A, "tmp"), I); -        return BinaryOperator::CreateAnd(N, Op1); +        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);        }      }    } @@ -5196,10 +5231,9 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {        Op0I->getOpcode() == Op1I->getOpcode() &&         Op0I->getOperand(1) == Op1I->getOperand(1) &&        (Op1I->hasOneUse() || Op1I->hasOneUse())) { -    Instruction *NewOp = -      InsertNewInstBefore(BinaryOperator::CreateXor(Op0I->getOperand(0), -                                                    Op1I->getOperand(0), -                                                    Op0I->getName()), I); +    Value *NewOp = +      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0), +                         Op0I->getName());      return BinaryOperator::Create(Op1I->getOpcode(), NewOp,                                     Op1I->getOperand(1));    } @@ -5235,8 +5269,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {          X = B, Y = A, Z = C;        if (X) { -        Instruction *NewOp = -        InsertNewInstBefore(BinaryOperator::CreateXor(Y, Z, Op0->getName()), I); +        Value *NewOp = Builder->CreateXor(Y, Z, Op0->getName());          return BinaryOperator::CreateAnd(NewOp, X);        }      } @@ -5244,7 +5277,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {    // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)    if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) -    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS),Context)) +    if (Instruction *R = AssociativeOpt(I, FoldICmpLogical(*this, RHS)))        return R;    // fold (xor (cast A), (cast B)) -> (cast (xor A, B)) @@ -5258,10 +5291,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {                                I.getType(), TD) &&              ValueRequiresCast(Op1C->getOpcode(), Op1C->getOperand(0),                                 I.getType(), TD)) { -          Instruction *NewOp = BinaryOperator::CreateXor(Op0C->getOperand(0), -                                                         Op1C->getOperand(0), -                                                         I.getName()); -          InsertNewInstBefore(NewOp, I); +          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0), +                                            Op1C->getOperand(0), I.getName());            return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());          }        } @@ -5271,8 +5302,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {  }  static ConstantInt *ExtractElement(Constant *V, Constant *Idx, -                                   LLVMContext* Context) { -  return cast<ConstantInt>(Context->getConstantExprExtractElement(V, Idx)); +                                   LLVMContext *Context) { +  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));  }  static bool HasAddOverflow(ConstantInt *Result, @@ -5290,13 +5321,13 @@ static bool HasAddOverflow(ConstantInt *Result,  /// AddWithOverflow - Compute Result = In1+In2, returning true if the result  /// overflowed for this type.  static bool AddWithOverflow(Constant *&Result, Constant *In1, -                            Constant *In2, LLVMContext* Context, +                            Constant *In2, LLVMContext *Context,                              bool IsSigned = false) { -  Result = Context->getConstantExprAdd(In1, In2); +  Result = ConstantExpr::getAdd(In1, In2);    if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { -      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i); +      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);        if (HasAddOverflow(ExtractElement(Result, Idx, Context),                           ExtractElement(In1, Idx, Context),                           ExtractElement(In2, Idx, Context), @@ -5326,13 +5357,13 @@ static bool HasSubOverflow(ConstantInt *Result,  /// SubWithOverflow - Compute Result = In1-In2, returning true if the result  /// overflowed for this type.  static bool SubWithOverflow(Constant *&Result, Constant *In1, -                            Constant *In2, LLVMContext* Context, +                            Constant *In2, LLVMContext *Context,                              bool IsSigned = false) { -  Result = Context->getConstantExprSub(In1, In2); +  Result = ConstantExpr::getSub(In1, In2);    if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { -      Constant *Idx = Context->getConstantInt(Type::Int32Ty, i); +      Constant *Idx = ConstantInt::get(Type::getInt32Ty(*Context), i);        if (HasSubOverflow(ExtractElement(Result, Idx, Context),                           ExtractElement(In1, Idx, Context),                           ExtractElement(In2, Idx, Context), @@ -5351,11 +5382,10 @@ static bool SubWithOverflow(Constant *&Result, Constant *In1,  /// code necessary to compute the offset from the base pointer (without adding  /// in the base pointer).  Return the result as a signed integer of intptr size.  static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) { -  TargetData &TD = IC.getTargetData(); +  TargetData &TD = *IC.getTargetData();    gep_type_iterator GTI = gep_type_begin(GEP); -  const Type *IntPtrTy = TD.getIntPtrType(); -  LLVMContext* Context = IC.getContext(); -  Value *Result = Context->getNullValue(IntPtrTy); +  const Type *IntPtrTy = TD.getIntPtrType(I.getContext()); +  Value *Result = Constant::getNullValue(IntPtrTy);    // Build a mask for high order bits.    unsigned IntPtrWidth = TD.getPointerSizeInBits(); @@ -5372,74 +5402,49 @@ static Value *EmitGEPOffset(User *GEP, Instruction &I, InstCombiner &IC) {        if (const StructType *STy = dyn_cast<StructType>(*GTI)) {          Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); -        if (ConstantInt *RC = dyn_cast<ConstantInt>(Result)) -          Result =  -             Context->getConstantInt(RC->getValue() + APInt(IntPtrWidth, Size)); -        else -          Result = IC.InsertNewInstBefore( -                   BinaryOperator::CreateAdd(Result, -                                        Context->getConstantInt(IntPtrTy, Size), -                                             GEP->getName()+".offs"), I); +        Result = IC.Builder->CreateAdd(Result, +                                       ConstantInt::get(IntPtrTy, Size), +                                       GEP->getName()+".offs");          continue;        } -      Constant *Scale = Context->getConstantInt(IntPtrTy, Size); +      Constant *Scale = ConstantInt::get(IntPtrTy, Size);        Constant *OC = -              Context->getConstantExprIntegerCast(OpC, IntPtrTy, true /*SExt*/); -      Scale = Context->getConstantExprMul(OC, Scale); -      if (Constant *RC = dyn_cast<Constant>(Result)) -        Result = Context->getConstantExprAdd(RC, Scale); -      else { -        // Emit an add instruction. -        Result = IC.InsertNewInstBefore( -           BinaryOperator::CreateAdd(Result, Scale, -                                     GEP->getName()+".offs"), I); -      } +              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/); +      Scale = ConstantExpr::getMul(OC, Scale); +      // Emit an add instruction. +      Result = IC.Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");        continue;      }      // Convert to correct type. -    if (Op->getType() != IntPtrTy) { -      if (Constant *OpC = dyn_cast<Constant>(Op)) -        Op = Context->getConstantExprIntegerCast(OpC, IntPtrTy, true); -      else -        Op = IC.InsertNewInstBefore(CastInst::CreateIntegerCast(Op, IntPtrTy, -                                                                true, -                                                      Op->getName()+".c"), I); -    } +    if (Op->getType() != IntPtrTy) +      Op = IC.Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");      if (Size != 1) { -      Constant *Scale = Context->getConstantInt(IntPtrTy, Size); -      if (Constant *OpC = dyn_cast<Constant>(Op)) -        Op = Context->getConstantExprMul(OpC, Scale); -      else    // We'll let instcombine(mul) convert this to a shl if possible. -        Op = IC.InsertNewInstBefore(BinaryOperator::CreateMul(Op, Scale, -                                                  GEP->getName()+".idx"), I); +      Constant *Scale = ConstantInt::get(IntPtrTy, Size); +      // We'll let instcombine(mul) convert this to a shl if possible. +      Op = IC.Builder->CreateMul(Op, Scale, GEP->getName()+".idx");      }      // Emit an add instruction. -    if (isa<Constant>(Op) && isa<Constant>(Result)) -      Result = Context->getConstantExprAdd(cast<Constant>(Op), -                                    cast<Constant>(Result)); -    else -      Result = IC.InsertNewInstBefore(BinaryOperator::CreateAdd(Op, Result, -                                                  GEP->getName()+".offs"), I); +    Result = IC.Builder->CreateAdd(Op, Result, GEP->getName()+".offs");    }    return Result;  } -/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of -/// the *offset* implied by GEP to zero.  For example, if we have &A[i], we want -/// to return 'i' for "icmp ne i, 0".  Note that, in general, indices can be -/// complex, and scales are involved.  The above expression would also be legal -/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).  This -/// later form is less amenable to optimization though, and we are allowed to -/// generate the first by knowing that pointer arithmetic doesn't overflow. +/// EvaluateGEPOffsetExpression - Return a value that can be used to compare +/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we +/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can +/// be complex, and scales are involved.  The above expression would also be +/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). +/// This later form is less amenable to optimization though, and we are allowed +/// to generate the first by knowing that pointer arithmetic doesn't overflow.  ///  /// If we can't emit an optimized form for this expression, this returns null.  ///   static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,                                            InstCombiner &IC) { -  TargetData &TD = IC.getTargetData(); +  TargetData &TD = *IC.getTargetData();    gep_type_iterator GTI = gep_type_begin(GEP);    // Check to see if this gep only has a single variable index.  If so, and if @@ -5502,8 +5507,9 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,      // we don't need to bother extending: the extension won't affect where the      // computation crosses zero.      if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) -      VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(), -                                  VariableIdx->getNameStart(), &I); +      VariableIdx = new TruncInst(VariableIdx,  +                                  TD.getIntPtrType(VariableIdx->getContext()), +                                  VariableIdx->getName(), &I);      return VariableIdx;    } @@ -5523,40 +5529,39 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,      return 0;    // Okay, we can do this evaluation.  Start by converting the index to intptr. -  const Type *IntPtrTy = TD.getIntPtrType(); +  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());    if (VariableIdx->getType() != IntPtrTy)      VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,                                                true /*SExt*/,  -                                              VariableIdx->getNameStart(), &I); -  Constant *OffsetVal = IC.getContext()->getConstantInt(IntPtrTy, NewOffs); +                                              VariableIdx->getName(), &I); +  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);    return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);  }  /// FoldGEPICmp - Fold comparisons between a GEP instruction and something  /// else.  At this point we know that the GEP is on the LHS of the comparison. -Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS, +Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,                                         ICmpInst::Predicate Cond,                                         Instruction &I) { -  assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!"); -    // Look through bitcasts.    if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))      RHS = BCI->getOperand(0);    Value *PtrBase = GEPLHS->getOperand(0); -  if (PtrBase == RHS) { +  if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {      // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).      // This transformation (ignoring the base and scales) is valid because we -    // know pointers can't overflow.  See if we can output an optimized form. +    // know pointers can't overflow since the gep is inbounds.  See if we can +    // output an optimized form.      Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);      // If not, synthesize the offset the hard way.      if (Offset == 0)        Offset = EmitGEPOffset(GEPLHS, I, *this);      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, -                        Context->getNullValue(Offset->getType())); -  } else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) { +                        Constant::getNullValue(Offset->getType())); +  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {      // If the base pointers are different, but the indices are the same, just      // compare the base pointer.      if (PtrBase != GEPRHS->getOperand(0)) { @@ -5572,7 +5577,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,        // If all indices are the same, just compare the base pointers.        if (IndicesTheSame) -        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),  +        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),                              GEPLHS->getOperand(0), GEPRHS->getOperand(0));        // Otherwise, the base pointers are different and the indices are @@ -5622,7 +5627,7 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,        if (NumDifferences == 0)   // SAME GEP?          return ReplaceInstUsesWith(I, // No comparison is needed here. -                                   Context->getConstantInt(Type::Int1Ty, +                                   ConstantInt::get(Type::getInt1Ty(*Context),                                               ICmpInst::isTrueWhenEqual(Cond)));        else if (NumDifferences == 1) { @@ -5635,7 +5640,8 @@ Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,      // Only lower this if the icmp is the only user of the GEP or if we expect      // the result to fold to a constant! -    if ((isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) && +    if (TD && +        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&          (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {        // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)        Value *L = EmitGEPOffset(GEPLHS, I, *this); @@ -5680,7 +5686,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,    ICmpInst::Predicate Pred;    switch (I.getPredicate()) { -  default: assert(0 && "Unexpected predicate!"); +  default: llvm_unreachable("Unexpected predicate!");    case FCmpInst::FCMP_UEQ:    case FCmpInst::FCMP_OEQ:      Pred = ICmpInst::ICMP_EQ; @@ -5706,9 +5712,9 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,      Pred = ICmpInst::ICMP_NE;      break;    case FCmpInst::FCMP_ORD: -    return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +    return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));    case FCmpInst::FCMP_UNO: -    return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +    return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));    }    const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType()); @@ -5728,8 +5734,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,      if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0        if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||            Pred == ICmpInst::ICMP_SLE) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -      return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));      }    } else {      // If the RHS value is > UnsignedMax, fold the comparison. This handles @@ -5740,8 +5746,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,      if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0        if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||            Pred == ICmpInst::ICMP_ULE) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -      return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));      }    } @@ -5753,8 +5759,8 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,      if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0        if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||            Pred == ICmpInst::ICMP_SGE) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -      return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +      return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));      }    } @@ -5763,27 +5769,27 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,    // casting the FP value to the integer value and back, checking for equality.    // Don't do this for zero, because -0.0 is not fractional.    Constant *RHSInt = LHSUnsigned -    ? Context->getConstantExprFPToUI(RHSC, IntTy) -    : Context->getConstantExprFPToSI(RHSC, IntTy); +    ? ConstantExpr::getFPToUI(RHSC, IntTy) +    : ConstantExpr::getFPToSI(RHSC, IntTy);    if (!RHS.isZero()) {      bool Equal = LHSUnsigned -      ? Context->getConstantExprUIToFP(RHSInt, RHSC->getType()) == RHSC -      : Context->getConstantExprSIToFP(RHSInt, RHSC->getType()) == RHSC; +      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC +      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;      if (!Equal) {        // If we had a comparison against a fractional value, we have to adjust        // the compare predicate and sometimes the value.  RHSC is rounded towards        // zero at this point.        switch (Pred) { -      default: assert(0 && "Unexpected integer comparison!"); +      default: llvm_unreachable("Unexpected integer comparison!");        case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        case ICmpInst::ICMP_ULE:          // (float)int <= 4.4   --> int <= 4          // (float)int <= -4.4  --> false          if (RHS.isNegative()) -          return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));          break;        case ICmpInst::ICMP_SLE:          // (float)int <= 4.4   --> int <= 4 @@ -5795,7 +5801,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,          // (float)int < -4.4   --> false          // (float)int < 4.4    --> int <= 4          if (RHS.isNegative()) -          return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));          Pred = ICmpInst::ICMP_ULE;          break;        case ICmpInst::ICMP_SLT: @@ -5808,7 +5814,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,          // (float)int > 4.4    --> int > 4          // (float)int > -4.4   --> true          if (RHS.isNegative()) -          return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));          break;        case ICmpInst::ICMP_SGT:          // (float)int > 4.4    --> int > 4 @@ -5820,7 +5826,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,          // (float)int >= -4.4   --> true          // (float)int >= 4.4    --> int > 4          if (!RHS.isNegative()) -          return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +          return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));          Pred = ICmpInst::ICMP_UGT;          break;        case ICmpInst::ICMP_SGE: @@ -5844,22 +5850,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {    // Fold trivial predicates.    if (I.getPredicate() == FCmpInst::FCMP_FALSE) -    return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));    if (I.getPredicate() == FCmpInst::FCMP_TRUE) -    return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));    // Simplify 'fcmp pred X, X'    if (Op0 == Op1) {      switch (I.getPredicate()) { -    default: assert(0 && "Unknown predicate!"); +    default: llvm_unreachable("Unknown predicate!");      case FCmpInst::FCMP_UEQ:    // True if unordered or equal      case FCmpInst::FCMP_UGE:    // True if unordered, greater than, or equal      case FCmpInst::FCMP_ULE:    // True if unordered, less than, or equal -      return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));      case FCmpInst::FCMP_OGT:    // True if ordered and greater than      case FCmpInst::FCMP_OLT:    // True if ordered and less than      case FCmpInst::FCMP_ONE:    // True if ordered and operands are unequal -      return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +      return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));      case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)      case FCmpInst::FCMP_ULT:    // True if unordered or less than @@ -5867,7 +5873,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {      case FCmpInst::FCMP_UNE:    // True if unordered or not equal        // Canonicalize these to be 'fcmp uno %X, 0.0'.        I.setPredicate(FCmpInst::FCMP_UNO); -      I.setOperand(1, Context->getNullValue(Op0->getType())); +      I.setOperand(1, Constant::getNullValue(Op0->getType()));        return &I;      case FCmpInst::FCMP_ORD:    // True if ordered (no nans) @@ -5876,13 +5882,13 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {      case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal        // Canonicalize these to be 'fcmp ord %X, 0.0'.        I.setPredicate(FCmpInst::FCMP_ORD); -      I.setOperand(1, Context->getNullValue(Op0->getType())); +      I.setOperand(1, Constant::getNullValue(Op0->getType()));        return &I;      }    }    if (isa<UndefValue>(Op1))                  // fcmp pred X, undef -> undef -    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty)); +    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));    // Handle fcmp with constant RHS    if (Constant *RHSC = dyn_cast<Constant>(Op1)) { @@ -5890,11 +5896,11 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {      if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {        if (CFP->getValueAPF().isNaN()) {          if (FCmpInst::isOrdered(I.getPredicate()))   // True if ordered and... -          return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +          return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));          assert(FCmpInst::isUnordered(I.getPredicate()) &&                 "Comparison must be either ordered or unordered!");          // True if unordered. -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        }      } @@ -5905,7 +5911,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {          // block.  If in the same block, we're encouraging jump threading.  If          // not, we are just pessimizing the code by making an i1 phi.          if (LHSI->getParent() == I.getParent()) -          if (Instruction *NV = FoldOpIntoPhi(I)) +          if (Instruction *NV = FoldOpIntoPhi(I, true))              return NV;          break;        case Instruction::SIToFP: @@ -5921,18 +5927,16 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {          if (LHSI->hasOneUse()) {            if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {              // Fold the known value into the constant operand. -            Op1 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC); +            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);              // Insert a new FCmp of the other select operand. -            Op2 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), -                                                      LHSI->getOperand(2), RHSC, -                                                      I.getName()), I); +            Op2 = Builder->CreateFCmp(I.getPredicate(), +                                      LHSI->getOperand(2), RHSC, I.getName());            } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {              // Fold the known value into the constant operand. -            Op2 = Context->getConstantExprCompare(I.getPredicate(), C, RHSC); +            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);              // Insert a new FCmp of the other select operand. -            Op1 = InsertNewInstBefore(new FCmpInst(I.getPredicate(), -                                                      LHSI->getOperand(1), RHSC, -                                                      I.getName()), I); +            Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1), +                                      RHSC, I.getName());            }          } @@ -5952,28 +5956,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {    // icmp X, X    if (Op0 == Op1) -    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty,  +    return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),                                                     I.isTrueWhenEqual()));    if (isa<UndefValue>(Op1))                  // X icmp undef -> undef -    return ReplaceInstUsesWith(I, Context->getUndef(Type::Int1Ty)); +    return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));    // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value    // addresses never equal each other!  We already know that Op0 != Op1. -  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) || +  if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||          isa<ConstantPointerNull>(Op0)) && -      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) || +      (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||          isa<ConstantPointerNull>(Op1))) -    return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty,  +    return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context),                                                      !I.isTrueWhenEqual()));    // icmp's with boolean values can always be turned into bitwise operations -  if (Ty == Type::Int1Ty) { +  if (Ty == Type::getInt1Ty(*Context)) {      switch (I.getPredicate()) { -    default: assert(0 && "Invalid icmp instruction!"); +    default: llvm_unreachable("Invalid icmp instruction!");      case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B) -      Instruction *Xor = BinaryOperator::CreateXor(Op0, Op1, I.getName()+"tmp"); -      InsertNewInstBefore(Xor, I); +      Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");        return BinaryOperator::CreateNot(Xor);      }      case ICmpInst::ICMP_NE:                  // icmp eq i1 A, B -> A^B @@ -5983,32 +5986,28 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {        std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult        // FALL THROUGH      case ICmpInst::ICMP_ULT:{               // icmp ult i1 A, B -> ~A & B -      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); -      InsertNewInstBefore(Not, I); +      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");        return BinaryOperator::CreateAnd(Not, Op1);      }      case ICmpInst::ICMP_SGT:        std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt        // FALL THROUGH      case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B -      Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); -      InsertNewInstBefore(Not, I); +      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");        return BinaryOperator::CreateAnd(Not, Op0);      }      case ICmpInst::ICMP_UGE:        std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule        // FALL THROUGH      case ICmpInst::ICMP_ULE: {               //  icmp ule i1 A, B -> ~A | B -      Instruction *Not = BinaryOperator::CreateNot(Op0, I.getName()+"tmp"); -      InsertNewInstBefore(Not, I); +      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");        return BinaryOperator::CreateOr(Not, Op1);      }      case ICmpInst::ICMP_SGE:        std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle        // FALL THROUGH      case ICmpInst::ICMP_SLE: {               //  icmp sle i1 A, B -> A | ~B -      Instruction *Not = BinaryOperator::CreateNot(Op1, I.getName()+"tmp"); -      InsertNewInstBefore(Not, I); +      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");        return BinaryOperator::CreateOr(Not, Op0);      }      } @@ -6040,20 +6039,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {      default: break;      case ICmpInst::ICMP_ULE:        if (CI->isMaxValue(false))                 // A <=u MAX -> TRUE -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, AddOne(CI, Context)); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +      return new ICmpInst(ICmpInst::ICMP_ULT, Op0, +                          AddOne(CI));      case ICmpInst::ICMP_SLE:        if (CI->isMaxValue(true))                  // A <=s MAX -> TRUE -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, AddOne(CI, Context)); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +      return new ICmpInst(ICmpInst::ICMP_SLT, Op0, +                          AddOne(CI));      case ICmpInst::ICMP_UGE:        if (CI->isMinValue(false))                 // A >=u MIN -> TRUE -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -      return new ICmpInst( ICmpInst::ICMP_UGT, Op0, SubOne(CI, Context)); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +      return new ICmpInst(ICmpInst::ICMP_UGT, Op0, +                          SubOne(CI));      case ICmpInst::ICMP_SGE:        if (CI->isMinValue(true))                  // A >=s MIN -> TRUE -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); -      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, SubOne(CI, Context)); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context)); +      return new ICmpInst(ICmpInst::ICMP_SGT, Op0, +                          SubOne(CI));      }      // If this comparison is a normal comparison, it demands all @@ -6100,110 +6103,114 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {      // that code below can assume that Min != Max.      if (!isa<Constant>(Op0) && Op0Min == Op0Max)        return new ICmpInst(I.getPredicate(), -                          Context->getConstantInt(Op0Min), Op1); +                          ConstantInt::get(*Context, Op0Min), Op1);      if (!isa<Constant>(Op1) && Op1Min == Op1Max) -      return new ICmpInst(I.getPredicate(), Op0,  -                          Context->getConstantInt(Op1Min)); +      return new ICmpInst(I.getPredicate(), Op0, +                          ConstantInt::get(*Context, Op1Min));      // Based on the range information we know about the LHS, see if we can      // simplify this comparison.  For example, (x&4) < 8  is always true.      switch (I.getPredicate()) { -    default: assert(0 && "Unknown icmp opcode!"); +    default: llvm_unreachable("Unknown icmp opcode!");      case ICmpInst::ICMP_EQ:        if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        break;      case ICmpInst::ICMP_NE:        if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        break;      case ICmpInst::ICMP_ULT:        if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)          return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {          if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C -          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context)); +          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, +                              SubOne(CI));          // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear          if (CI->isMinValue(true))            return new ICmpInst(ICmpInst::ICMP_SGT, Op0, -                           Context->getConstantIntAllOnesValue(Op0->getType())); +                           Constant::getAllOnesValue(Op0->getType()));        }        break;      case ICmpInst::ICMP_UGT:        if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)          return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {          if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C -          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context)); +          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, +                              AddOne(CI));          // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set          if (CI->isMaxValue(true))            return new ICmpInst(ICmpInst::ICMP_SLT, Op0, -                              Context->getNullValue(Op0->getType())); +                              Constant::getNullValue(Op0->getType()));        }        break;      case ICmpInst::ICMP_SLT:        if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)          return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {          if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C -          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, SubOne(CI, Context)); +          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, +                              SubOne(CI));        }        break;      case ICmpInst::ICMP_SGT:        if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)          return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {          if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C -          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, AddOne(CI, Context)); +          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, +                              AddOne(CI));        }        break;      case ICmpInst::ICMP_SGE:        assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");        if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        break;      case ICmpInst::ICMP_SLE:        assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");        if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        break;      case ICmpInst::ICMP_UGE:        assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");        if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        break;      case ICmpInst::ICMP_ULE:        assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");        if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntTrue()); +        return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));        if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B) -        return ReplaceInstUsesWith(I, Context->getConstantIntFalse()); +        return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));        break;      } @@ -6255,16 +6262,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {              }            if (isAllZeros)              return new ICmpInst(I.getPredicate(), LHSI->getOperand(0), -                    Context->getNullValue(LHSI->getOperand(0)->getType())); +                    Constant::getNullValue(LHSI->getOperand(0)->getType()));          }          break;        case Instruction::PHI: -        // Only fold icmp into the PHI if the phi and fcmp are in the same +        // Only fold icmp into the PHI if the phi and icmp are in the same          // block.  If in the same block, we're encouraging jump threading.  If          // not, we are just pessimizing the code by making an i1 phi.          if (LHSI->getParent() == I.getParent()) -          if (Instruction *NV = FoldOpIntoPhi(I)) +          if (Instruction *NV = FoldOpIntoPhi(I, true))              return NV;          break;        case Instruction::Select: { @@ -6275,18 +6282,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {          if (LHSI->hasOneUse()) {            if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {              // Fold the known value into the constant operand. -            Op1 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC); +            Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);              // Insert a new ICmp of the other select operand. -            Op2 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), -                                                   LHSI->getOperand(2), RHSC, -                                                   I.getName()), I); +            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), +                                      RHSC, I.getName());            } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {              // Fold the known value into the constant operand. -            Op2 = Context->getConstantExprICmp(I.getPredicate(), C, RHSC); +            Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);              // Insert a new ICmp of the other select operand. -            Op1 = InsertNewInstBefore(new ICmpInst(I.getPredicate(), -                                                   LHSI->getOperand(1), RHSC, -                                                   I.getName()), I); +            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), +                                      RHSC, I.getName());            }          } @@ -6298,19 +6303,31 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {          // If we have (malloc != null), and if the malloc has a single use, we          // can assume it is successful and remove the malloc.          if (LHSI->hasOneUse() && isa<ConstantPointerNull>(RHSC)) { -          AddToWorkList(LHSI); -          return ReplaceInstUsesWith(I, Context->getConstantInt(Type::Int1Ty, -                                                         !I.isTrueWhenEqual())); +          Worklist.Add(LHSI); +          return ReplaceInstUsesWith(I, +                                     ConstantInt::get(Type::getInt1Ty(*Context), +                                                      !I.isTrueWhenEqual())); +        } +        break; +      case Instruction::Call: +        // If we have (malloc != null), and if the malloc has a single use, we +        // can assume it is successful and remove the malloc. +        if (isMalloc(LHSI) && LHSI->hasOneUse() && +            isa<ConstantPointerNull>(RHSC)) { +          Worklist.Add(LHSI); +          return ReplaceInstUsesWith(I, +                                     ConstantInt::get(Type::getInt1Ty(*Context), +                                                      !I.isTrueWhenEqual()));          }          break;        }    }    // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now. -  if (User *GEP = dyn_castGetElementPtr(Op0)) +  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))      if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))        return NI; -  if (User *GEP = dyn_castGetElementPtr(Op1)) +  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))      if (Instruction *NI = FoldGEPICmp(GEP, Op0,                             ICmpInst::getSwappedPredicate(I.getPredicate()), I))        return NI; @@ -6333,10 +6350,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {        // If Op1 is a constant, we can fold the cast into the constant.        if (Op0->getType() != Op1->getType()) {          if (Constant *Op1C = dyn_cast<Constant>(Op1)) { -          Op1 = Context->getConstantExprBitCast(Op1C, Op0->getType()); +          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());          } else {            // Otherwise, cast the RHS right before the icmp -          Op1 = InsertBitCastBefore(Op1, Op0->getType(), I); +          Op1 = Builder->CreateBitCast(Op1, Op0->getType());          }        }        return new ICmpInst(I.getPredicate(), Op0, Op1); @@ -6397,16 +6414,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {              // Mask = -1 >> count-trailing-zeros(Cst).              if (!CI->isZero() && !CI->isOne()) {                const APInt &AP = CI->getValue(); -              ConstantInt *Mask = Context->getConstantInt( +              ConstantInt *Mask = ConstantInt::get(*Context,                                         APInt::getLowBitsSet(AP.getBitWidth(),                                                             AP.getBitWidth() -                                                        AP.countTrailingZeros())); -              Instruction *And1 = BinaryOperator::CreateAnd(Op0I->getOperand(0), -                                                            Mask); -              Instruction *And2 = BinaryOperator::CreateAnd(Op1I->getOperand(0), -                                                            Mask); -              InsertNewInstBefore(And1, I); -              InsertNewInstBefore(And2, I); +              Value *And1 = Builder->CreateAnd(Op0I->getOperand(0), Mask); +              Value *And2 = Builder->CreateAnd(Op1I->getOperand(0), Mask);                return new ICmpInst(I.getPredicate(), And1, And2);              }            } @@ -6435,7 +6448,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {        if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0          Value *OtherVal = A == Op1 ? B : A;          return new ICmpInst(I.getPredicate(), OtherVal, -                            Context->getNullValue(A->getType())); +                            Constant::getNullValue(A->getType()));        }        if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) { @@ -6444,10 +6457,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {          if (match(B, m_ConstantInt(C1)) &&              match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {            Constant *NC =  -                       Context->getConstantInt(C1->getValue() ^ C2->getValue()); -          Instruction *Xor = BinaryOperator::CreateXor(C, NC, "tmp"); -          return new ICmpInst(I.getPredicate(), A, -                              InsertNewInstBefore(Xor, I)); +                   ConstantInt::get(*Context, C1->getValue() ^ C2->getValue()); +          Value *Xor = Builder->CreateXor(C, NC, "tmp"); +          return new ICmpInst(I.getPredicate(), A, Xor);          }          // A^B == A^D -> B == D @@ -6463,18 +6475,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {        // A == (A^B)  ->  B == 0        Value *OtherVal = A == Op0 ? B : A;        return new ICmpInst(I.getPredicate(), OtherVal, -                          Context->getNullValue(A->getType())); +                          Constant::getNullValue(A->getType()));      }      // (A-B) == A  ->  B == 0      if (match(Op0, m_Sub(m_Specific(Op1), m_Value(B))))        return new ICmpInst(I.getPredicate(), B,  -                          Context->getNullValue(B->getType())); +                          Constant::getNullValue(B->getType()));      // A == (A-B)  ->  B == 0      if (match(Op1, m_Sub(m_Specific(Op0), m_Value(B))))        return new ICmpInst(I.getPredicate(), B, -                          Context->getNullValue(B->getType())); +                          Constant::getNullValue(B->getType()));      // (X&Z) == (Y&Z) -> (X^Y) & Z == 0      if (Op0->hasOneUse() && Op1->hasOneUse() && @@ -6493,10 +6505,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {        }        if (X) {   // Build (X^Y) & Z -        Op1 = InsertNewInstBefore(BinaryOperator::CreateXor(X, Y, "tmp"), I); -        Op1 = InsertNewInstBefore(BinaryOperator::CreateAnd(Op1, Z, "tmp"), I); +        Op1 = Builder->CreateXor(X, Y, "tmp"); +        Op1 = Builder->CreateAnd(Op1, Z, "tmp");          I.setOperand(0, Op1); -        I.setOperand(1, Context->getNullValue(Op1->getType())); +        I.setOperand(1, Constant::getNullValue(Op1->getType()));          return &I;        }      } @@ -6535,13 +6547,13 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,    // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and     // C2 (CI). By solving for X we can turn this into a range check     // instead of computing a divide.  -  Constant *Prod = Context->getConstantExprMul(CmpRHS, DivRHS); +  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);    // Determine if the product overflows by seeing if the product is    // not equal to the divide. Make sure we do the same kind of divide    // as in the LHS instruction that we're folding.  -  bool ProdOV = (DivIsSigned ? Context->getConstantExprSDiv(Prod, DivRHS) : -                 Context->getConstantExprUDiv(Prod, DivRHS)) != CmpRHS; +  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) : +                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;    // Get the ICmp opcode    ICmpInst::Predicate Pred = ICI.getPredicate(); @@ -6565,8 +6577,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,    } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.      if (CmpRHSV == 0) {       // (X / pos) op 0        // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2) -      LoBound = cast<ConstantInt>(Context->getConstantExprNeg(SubOne(DivRHS,  -                                                                    Context))); +      LoBound = cast<ConstantInt>(ConstantExpr::getNeg(SubOne(DivRHS)));        HiBound = DivRHS;      } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos        LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20) @@ -6575,11 +6586,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,          HiOverflow = AddWithOverflow(HiBound, Prod, DivRHS, Context, true);      } else {                       // (X / pos) op neg        // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14) -      HiBound = AddOne(Prod, Context); +      HiBound = AddOne(Prod);        LoOverflow = HiOverflow = ProdOV ? -1 : 0;        if (!LoOverflow) {          ConstantInt* DivNeg = -                         cast<ConstantInt>(Context->getConstantExprNeg(DivRHS)); +                         cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));          LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, Context,                                       true) ? -1 : 0;         } @@ -6587,15 +6598,15 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,    } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.      if (CmpRHSV == 0) {       // (X / neg) op 0        // e.g. X/-5 op 0  --> [-4, 5) -      LoBound = AddOne(DivRHS, Context); -      HiBound = cast<ConstantInt>(Context->getConstantExprNeg(DivRHS)); +      LoBound = AddOne(DivRHS); +      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(DivRHS));        if (HiBound == DivRHS) {     // -INTMIN = INTMIN          HiOverflow = 1;            // [INTMIN+1, overflow)          HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN        }      } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos        // e.g. X/-5 op 3  --> [-19, -14) -      HiBound = AddOne(Prod, Context); +      HiBound = AddOne(Prod);        HiOverflow = LoOverflow = ProdOV ? -1 : 0;        if (!LoOverflow)          LoOverflow = AddWithOverflow(LoBound, HiBound, @@ -6613,42 +6624,42 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,    Value *X = DivI->getOperand(0);    switch (Pred) { -  default: assert(0 && "Unhandled icmp opcode!"); +  default: llvm_unreachable("Unhandled icmp opcode!");    case ICmpInst::ICMP_EQ:      if (LoOverflow && HiOverflow) -      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); +      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));      else if (HiOverflow) -      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :  +      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :                            ICmpInst::ICMP_UGE, X, LoBound);      else if (LoOverflow) -      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :  +      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :                            ICmpInst::ICMP_ULT, X, HiBound);      else        return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, true, ICI);    case ICmpInst::ICMP_NE:      if (LoOverflow && HiOverflow) -      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); +      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));      else if (HiOverflow) -      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :  +      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :                            ICmpInst::ICMP_ULT, X, LoBound);      else if (LoOverflow) -      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :  +      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :                            ICmpInst::ICMP_UGE, X, HiBound);      else        return InsertRangeTest(X, LoBound, HiBound, DivIsSigned, false, ICI);    case ICmpInst::ICMP_ULT:    case ICmpInst::ICMP_SLT:      if (LoOverflow == +1)   // Low bound is greater than input range. -      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); +      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));      if (LoOverflow == -1)   // Low bound is less than input range. -      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); +      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));      return new ICmpInst(Pred, X, LoBound);    case ICmpInst::ICMP_UGT:    case ICmpInst::ICMP_SGT:      if (HiOverflow == +1)       // High bound greater than input range. -      return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); +      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));      else if (HiOverflow == -1)  // High bound less than input range. -      return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); +      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));      if (Pred == ICmpInst::ICMP_UGT)        return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);      else @@ -6682,7 +6693,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          NewRHS.zext(SrcBits);          NewRHS |= KnownOne;          return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), -                            Context->getConstantInt(NewRHS)); +                            ConstantInt::get(*Context, NewRHS));        }      }      break; @@ -6699,7 +6710,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          // the operation, just stop using the Xor.          if (!XorCST->getValue().isNegative()) {            ICI.setOperand(0, CompareVal); -          AddToWorkList(LHSI); +          Worklist.Add(LHSI);            return &ICI;          } @@ -6711,10 +6722,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          if (isTrueIfPositive)            return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal, -                              SubOne(RHS, Context)); +                              SubOne(RHS));          else            return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal, -                              AddOne(RHS, Context)); +                              AddOne(RHS));        }        if (LHSI->hasOneUse()) { @@ -6725,7 +6736,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,                                           ? ICI.getUnsignedPredicate()                                           : ICI.getSignedPredicate();            return new ICmpInst(Pred, LHSI->getOperand(0), -                              Context->getConstantInt(RHSV ^ SignBit)); +                              ConstantInt::get(*Context, RHSV ^ SignBit));          }          // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A) @@ -6736,7 +6747,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,                                           : ICI.getSignedPredicate();            Pred = ICI.getSwappedPredicate(Pred);            return new ICmpInst(Pred, LHSI->getOperand(0), -                              Context->getConstantInt(RHSV ^ NotSignBit)); +                              ConstantInt::get(*Context, RHSV ^ NotSignBit));          }        }      } @@ -6763,12 +6774,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,            NewCST.zext(BitWidth);            APInt NewCI = RHSV;            NewCI.zext(BitWidth); -          Instruction *NewAnd =  -            BinaryOperator::CreateAnd(Cast->getOperand(0), -                               Context->getConstantInt(NewCST),LHSI->getName()); -          InsertNewInstBefore(NewAnd, ICI); +          Value *NewAnd =  +            Builder->CreateAnd(Cast->getOperand(0), +                           ConstantInt::get(*Context, NewCST), LHSI->getName());            return new ICmpInst(ICI.getPredicate(), NewAnd, -                              Context->getConstantInt(NewCI)); +                              ConstantInt::get(*Context, NewCI));          }        } @@ -6805,32 +6815,31 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          if (CanFold) {            Constant *NewCst;            if (Shift->getOpcode() == Instruction::Shl) -            NewCst = Context->getConstantExprLShr(RHS, ShAmt); +            NewCst = ConstantExpr::getLShr(RHS, ShAmt);            else -            NewCst = Context->getConstantExprShl(RHS, ShAmt); +            NewCst = ConstantExpr::getShl(RHS, ShAmt);            // Check to see if we are shifting out any of the bits being            // compared. -          if (Context->getConstantExpr(Shift->getOpcode(), +          if (ConstantExpr::get(Shift->getOpcode(),                                         NewCst, ShAmt) != RHS) {              // If we shifted bits out, the fold is not going to work out.              // As a special case, check to see if this means that the              // result is always true or false now.              if (ICI.getPredicate() == ICmpInst::ICMP_EQ) -              return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); +              return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));              if (ICI.getPredicate() == ICmpInst::ICMP_NE) -              return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); +              return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));            } else {              ICI.setOperand(1, NewCst);              Constant *NewAndCST;              if (Shift->getOpcode() == Instruction::Shl) -              NewAndCST = Context->getConstantExprLShr(AndCST, ShAmt); +              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);              else -              NewAndCST = Context->getConstantExprShl(AndCST, ShAmt); +              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);              LHSI->setOperand(1, NewAndCST);              LHSI->setOperand(0, Shift->getOperand(0)); -            AddToWorkList(Shift); // Shift is dead. -            AddUsesToWorkList(ICI); +            Worklist.Add(Shift); // Shift is dead.              return &ICI;            }          } @@ -6845,19 +6854,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          // Compute C << Y.          Value *NS;          if (Shift->getOpcode() == Instruction::LShr) { -          NS = BinaryOperator::CreateShl(AndCST,  -                                         Shift->getOperand(1), "tmp"); +          NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");          } else {            // Insert a logical shift. -          NS = BinaryOperator::CreateLShr(AndCST, -                                          Shift->getOperand(1), "tmp"); +          NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");          } -        InsertNewInstBefore(cast<Instruction>(NS), ICI);          // Compute X & (C << Y). -        Instruction *NewAnd =  -          BinaryOperator::CreateAnd(Shift->getOperand(0), NS, LHSI->getName()); -        InsertNewInstBefore(NewAnd, ICI); +        Value *NewAnd =  +          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());          ICI.setOperand(0, NewAnd);          return &ICI; @@ -6881,11 +6886,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,        // If we are comparing against bits always shifted out, the        // comparison cannot succeed.        Constant *Comp = -        Context->getConstantExprShl(Context->getConstantExprLShr(RHS, ShAmt), +        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),                                                                   ShAmt);        if (Comp != RHS) {// Comparing against a bit that we know is zero.          bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; -        Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE); +        Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);          return ReplaceInstUsesWith(ICI, Cst);        } @@ -6893,15 +6898,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          // Otherwise strength reduce the shift into an and.          uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);          Constant *Mask = -          Context->getConstantInt(APInt::getLowBitsSet(TypeBits,  +          ConstantInt::get(*Context, APInt::getLowBitsSet(TypeBits,                                                          TypeBits-ShAmtVal)); -        Instruction *AndI = -          BinaryOperator::CreateAnd(LHSI->getOperand(0), -                                    Mask, LHSI->getName()+".mask"); -        Value *And = InsertNewInstBefore(AndI, ICI); +        Value *And = +          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");          return new ICmpInst(ICI.getPredicate(), And, -                            Context->getConstantInt(RHSV.lshr(ShAmtVal))); +                            ConstantInt::get(*Context, RHSV.lshr(ShAmtVal)));        }      } @@ -6910,15 +6913,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,      if (LHSI->hasOneUse() &&          isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {        // (X << 31) <s 0  --> (X&1) != 0 -      Constant *Mask = Context->getConstantInt(APInt(TypeBits, 1) << +      Constant *Mask = ConstantInt::get(*Context, APInt(TypeBits, 1) <<                                             (TypeBits-ShAmt->getZExtValue()-1)); -      Instruction *AndI = -        BinaryOperator::CreateAnd(LHSI->getOperand(0), -                                  Mask, LHSI->getName()+".mask"); -      Value *And = InsertNewInstBefore(AndI, ICI); -       +      Value *And = +        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");        return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, -                          And, Context->getNullValue(And->getType())); +                          And, Constant::getNullValue(And->getType()));      }      break;    } @@ -6948,7 +6948,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,      if (Comp != RHSV) { // Comparing against a bit that we know is zero.        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE; -      Constant *Cst = Context->getConstantInt(Type::Int1Ty, IsICMP_NE); +      Constant *Cst = ConstantInt::get(Type::getInt1Ty(*Context), IsICMP_NE);        return ReplaceInstUsesWith(ICI, Cst);      } @@ -6959,20 +6959,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          MaskedValueIsZero(LHSI->getOperand(0),                             APInt::getLowBitsSet(Comp.getBitWidth(), ShAmtVal))) {        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), -                          Context->getConstantExprShl(RHS, ShAmt)); +                          ConstantExpr::getShl(RHS, ShAmt));      }      if (LHSI->hasOneUse()) {        // Otherwise strength reduce the shift into an and.        APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); -      Constant *Mask = Context->getConstantInt(Val); +      Constant *Mask = ConstantInt::get(*Context, Val); -      Instruction *AndI = -        BinaryOperator::CreateAnd(LHSI->getOperand(0), -                                  Mask, LHSI->getName()+".mask"); -      Value *And = InsertNewInstBefore(AndI, ICI); +      Value *And = Builder->CreateAnd(LHSI->getOperand(0), +                                      Mask, LHSI->getName()+".mask");        return new ICmpInst(ICI.getPredicate(), And, -                          Context->getConstantExprShl(RHS, ShAmt)); +                          ConstantExpr::getShl(RHS, ShAmt));      }      break;    } @@ -7005,18 +7003,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,        if (ICI.isSignedPredicate()) {          if (CR.getLower().isSignBit()) {            return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0), -                              Context->getConstantInt(CR.getUpper())); +                              ConstantInt::get(*Context, CR.getUpper()));          } else if (CR.getUpper().isSignBit()) {            return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0), -                              Context->getConstantInt(CR.getLower())); +                              ConstantInt::get(*Context, CR.getLower()));          }        } else {          if (CR.getLower().isMinValue()) {            return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), -                              Context->getConstantInt(CR.getUpper())); +                              ConstantInt::get(*Context, CR.getUpper()));          } else if (CR.getUpper().isMinValue()) {            return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), -                              Context->getConstantInt(CR.getLower())); +                              ConstantInt::get(*Context, CR.getLower()));          }        }      } @@ -7036,12 +7034,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){            const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();            if (V.sgt(APInt(V.getBitWidth(), 1)) && V.isPowerOf2()) { -            Instruction *NewRem = -              BinaryOperator::CreateURem(BO->getOperand(0), BO->getOperand(1), -                                         BO->getName()); -            InsertNewInstBefore(NewRem, ICI); -            return new ICmpInst(ICI.getPredicate(), NewRem,  -                                Context->getNullValue(BO->getType())); +            Value *NewRem = +              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1), +                                  BO->getName()); +            return new ICmpInst(ICI.getPredicate(), NewRem, +                                Constant::getNullValue(BO->getType()));            }          }          break; @@ -7050,19 +7047,18 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {            if (BO->hasOneUse())              return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), -                                Context->getConstantExprSub(RHS, BOp1C)); +                                ConstantExpr::getSub(RHS, BOp1C));          } else if (RHSV == 0) {            // Replace ((add A, B) != 0) with (A != -B) if A or B is            // efficiently invertible, or if the add has just this one use.            Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1); -          if (Value *NegVal = dyn_castNegVal(BOp1, Context)) +          if (Value *NegVal = dyn_castNegVal(BOp1))              return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); -          else if (Value *NegVal = dyn_castNegVal(BOp0, Context)) +          else if (Value *NegVal = dyn_castNegVal(BOp0))              return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);            else if (BO->hasOneUse()) { -            Instruction *Neg = BinaryOperator::CreateNeg(BOp1); -            InsertNewInstBefore(Neg, ICI); +            Value *Neg = Builder->CreateNeg(BOp1);              Neg->takeName(BO);              return new ICmpInst(ICI.getPredicate(), BOp0, Neg);            } @@ -7073,7 +7069,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          // the explicit xor.          if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),  -                              Context->getConstantExprXor(RHS, BOC)); +                              ConstantExpr::getXor(RHS, BOC));          // FALLTHROUGH        case Instruction::Sub: @@ -7087,10 +7083,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,          // If bits are being or'd in that are not present in the constant we          // are comparing against, then the comparison could never succeed!          if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) { -          Constant *NotCI = Context->getConstantExprNot(RHS); -          if (!Context->getConstantExprAnd(BOC, NotCI)->isNullValue()) +          Constant *NotCI = ConstantExpr::getNot(RHS); +          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())              return ReplaceInstUsesWith(ICI, -                                       Context->getConstantInt(Type::Int1Ty,  +                                       ConstantInt::get(Type::getInt1Ty(*Context),                                          isICMP_NE));          }          break; @@ -7101,19 +7097,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,            // comparison can never succeed!            if ((RHSV & ~BOC->getValue()) != 0)              return ReplaceInstUsesWith(ICI, -                                       Context->getConstantInt(Type::Int1Ty, +                                       ConstantInt::get(Type::getInt1Ty(*Context),                                         isICMP_NE));            // If we have ((X & C) == C), turn it into ((X & C) != 0).            if (RHS == BOC && RHSV.isPowerOf2())              return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :                                  ICmpInst::ICMP_NE, LHSI, -                                Context->getNullValue(RHS->getType())); +                                Constant::getNullValue(RHS->getType()));            // Replace (and X, (1 << size(X)-1) != 0) with x s< 0            if (BOC->getValue().isSignBit()) {              Value *X = BO->getOperand(0); -            Constant *Zero = Context->getNullValue(X->getType()); +            Constant *Zero = Constant::getNullValue(X->getType());              ICmpInst::Predicate pred = isICMP_NE ?                 ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;              return new ICmpInst(pred, X, Zero); @@ -7122,7 +7118,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,            // ((X & ~7) == 0) --> X < 8            if (RHSV == 0 && isHighOnes(BOC)) {              Value *X = BO->getOperand(0); -            Constant *NegX = Context->getConstantExprNeg(BOC); +            Constant *NegX = ConstantExpr::getNeg(BOC);              ICmpInst::Predicate pred = isICMP_NE ?                 ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;              return new ICmpInst(pred, X, NegX); @@ -7133,9 +7129,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,      } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {        // Handle icmp {eq|ne} <intrinsic>, intcst.        if (II->getIntrinsicID() == Intrinsic::bswap) { -        AddToWorkList(II); +        Worklist.Add(II);          ICI.setOperand(0, II->getOperand(1)); -        ICI.setOperand(1, Context->getConstantInt(RHSV.byteSwap())); +        ICI.setOperand(1, ConstantInt::get(*Context, RHSV.byteSwap()));          return &ICI;        }      } @@ -7155,17 +7151,17 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {    // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the     // integer type is the same size as the pointer type. -  if (LHSCI->getOpcode() == Instruction::PtrToInt && -      getTargetData().getPointerSizeInBits() ==  +  if (TD && LHSCI->getOpcode() == Instruction::PtrToInt && +      TD->getPointerSizeInBits() ==           cast<IntegerType>(DestTy)->getBitWidth()) {      Value *RHSOp = 0;      if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) { -      RHSOp = Context->getConstantExprIntToPtr(RHSC, SrcTy); +      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);      } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {        RHSOp = RHSC->getOperand(0);        // If the pointer types don't match, insert a bitcast.        if (LHSCIOp->getType() != RHSOp->getType()) -        RHSOp = InsertBitCastBefore(RHSOp, LHSCIOp->getType(), ICI); +        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());      }      if (RHSOp) @@ -7212,8 +7208,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {    // Compute the constant that would happen if we truncated to SrcTy then    // reextended to DestTy. -  Constant *Res1 = Context->getConstantExprTrunc(CI, SrcTy); -  Constant *Res2 = Context->getConstantExprCast(LHSCI->getOpcode(), +  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy); +  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),                                                  Res1, DestTy);    // If the re-extended constant didn't change... @@ -7239,9 +7235,9 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {    // First, handle some easy cases. We know the result cannot be equal at this    // point so handle the ICI.isEquality() cases    if (ICI.getPredicate() == ICmpInst::ICMP_EQ) -    return ReplaceInstUsesWith(ICI, Context->getConstantIntFalse()); +    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(*Context));    if (ICI.getPredicate() == ICmpInst::ICMP_NE) -    return ReplaceInstUsesWith(ICI, Context->getConstantIntTrue()); +    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(*Context));    // Evaluate the comparison for LT (we invert for GT below). LE and GE cases    // should have been folded away previously and not enter in here. @@ -7249,20 +7245,19 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {    if (isSignedCmp) {      // We're performing a signed comparison.      if (cast<ConstantInt>(CI)->getValue().isNegative()) -      Result = Context->getConstantIntFalse();          // X < (small) --> false +      Result = ConstantInt::getFalse(*Context);          // X < (small) --> false      else -      Result = Context->getConstantIntTrue();           // X < (large) --> true +      Result = ConstantInt::getTrue(*Context);           // X < (large) --> true    } else {      // We're performing an unsigned comparison.      if (isSignedExt) {        // We're performing an unsigned comp with a sign extended value.        // This is true if the input is >= 0. [aka >s -1] -      Constant *NegOne = Context->getConstantIntAllOnesValue(SrcTy); -      Result = InsertNewInstBefore(new ICmpInst(ICmpInst::ICMP_SGT, LHSCIOp, -                                   NegOne, ICI.getName()), ICI); +      Constant *NegOne = Constant::getAllOnesValue(SrcTy); +      Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());      } else {        // Unsigned extend & unsigned compare -> always true. -      Result = Context->getConstantIntTrue(); +      Result = ConstantInt::getTrue(*Context);      }    } @@ -7275,7 +7270,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {            ICI.getPredicate()==ICmpInst::ICMP_SGT) &&           "ICmp should be folded!");    if (Constant *CI = dyn_cast<Constant>(Result)) -    return ReplaceInstUsesWith(ICI, Context->getConstantExprNot(CI)); +    return ReplaceInstUsesWith(ICI, ConstantExpr::getNot(CI));    return BinaryOperator::CreateNot(Result);  } @@ -7317,21 +7312,21 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {    // shl X, 0 == X and shr X, 0 == X    // shl 0, X == 0 and shr 0, X == 0 -  if (Op1 == Context->getNullValue(Op1->getType()) || -      Op0 == Context->getNullValue(Op0->getType())) +  if (Op1 == Constant::getNullValue(Op1->getType()) || +      Op0 == Constant::getNullValue(Op0->getType()))      return ReplaceInstUsesWith(I, Op0);    if (isa<UndefValue>(Op0)) {                  if (I.getOpcode() == Instruction::AShr) // undef >>s X -> undef        return ReplaceInstUsesWith(I, Op0);      else                                    // undef << X -> 0, undef >>u X -> 0 -      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    }    if (isa<UndefValue>(Op1)) {      if (I.getOpcode() == Instruction::AShr)  // X >>s undef -> X        return ReplaceInstUsesWith(I, Op0);                else                                     // X << undef, X >>u undef -> 0 -      return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));    }    // See if we can fold away this shift. @@ -7363,9 +7358,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,    //    if (Op1->uge(TypeBits)) {      if (I.getOpcode() != Instruction::AShr) -      return ReplaceInstUsesWith(I, Context->getNullValue(Op0->getType())); +      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));      else { -      I.setOperand(1, Context->getConstantInt(I.getType(), TypeBits-1)); +      I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));        return &I;      }    } @@ -7375,7 +7370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,      if (BO->getOpcode() == Instruction::Mul && isLeftShift)        if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))          return BinaryOperator::CreateMul(BO->getOperand(0), -                                        Context->getConstantExprShl(BOOp, Op1)); +                                        ConstantExpr::getShl(BOOp, Op1));    // Try to fold constant and into select arguments.    if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) @@ -7396,10 +7391,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,      if (TrOp && I.isLogicalShift() && TrOp->isShift() &&           isa<ConstantInt>(TrOp->getOperand(1))) {        // Okay, we'll do this xform.  Make the shift of shift. -      Constant *ShAmt = Context->getConstantExprZExt(Op1, TrOp->getType()); -      Instruction *NSh = BinaryOperator::Create(I.getOpcode(), TrOp, ShAmt, -                                                I.getName()); -      InsertNewInstBefore(NSh, I); // (shift2 (shift1 & 0x00FF), c2) +      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); +      // (shift2 (shift1 & 0x00FF), c2) +      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());        // For logical shifts, the truncation has the effect of making the high        // part of the register be zeros.  Emulate this by inserting an AND to @@ -7420,10 +7414,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,          MaskV = MaskV.lshr(Op1->getZExtValue());        } -      Instruction *And = -        BinaryOperator::CreateAnd(NSh, Context->getConstantInt(MaskV),  -                                  TI->getName()); -      InsertNewInstBefore(And, I); // shift1 & 0x00FF +      // shift1 & 0x00FF +      Value *And = Builder->CreateAnd(NSh, ConstantInt::get(*Context, MaskV), +                                      TI->getName());        // Return the value truncated to the interesting size.        return new TruncInst(And, I.getType()); @@ -7444,17 +7437,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,            // These operators commute.            // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)            if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() && -              match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))){ -            Instruction *YS = BinaryOperator::CreateShl( -                                            Op0BO->getOperand(0), Op1, -                                            Op0BO->getName()); -            InsertNewInstBefore(YS, I); // (Y << C) -            Instruction *X =  -              BinaryOperator::Create(Op0BO->getOpcode(), YS, V1, -                                     Op0BO->getOperand(1)->getName()); -            InsertNewInstBefore(X, I);  // (X + (Y << C)) +              match(Op0BO->getOperand(1), m_Shr(m_Value(V1), +                    m_Specific(Op1)))) { +            Value *YS =         // (Y << C) +              Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); +            // (X + (Y << C)) +            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, +                                            Op0BO->getOperand(1)->getName());              uint32_t Op1Val = Op1->getLimitedValue(TypeBits); -            return BinaryOperator::CreateAnd(X, Context->getConstantInt( +            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,                         APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));            } @@ -7465,16 +7456,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,                      m_And(m_Shr(m_Value(V1), m_Specific(Op1)),                            m_ConstantInt(CC))) &&                cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) { -            Instruction *YS = BinaryOperator::CreateShl( -                                                     Op0BO->getOperand(0), Op1, -                                                     Op0BO->getName()); -            InsertNewInstBefore(YS, I); // (Y << C) -            Instruction *XM = -              BinaryOperator::CreateAnd(V1, -                                        Context->getConstantExprShl(CC, Op1), -                                        V1->getName()+".mask"); -            InsertNewInstBefore(XM, I); // X & (CC << C) -             +            Value *YS =   // (Y << C) +              Builder->CreateShl(Op0BO->getOperand(0), Op1, +                                           Op0BO->getName()); +            // X & (CC << C) +            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), +                                           V1->getName()+".mask");              return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);            }          } @@ -7483,17 +7470,15 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,          case Instruction::Sub: {            // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)            if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() && -              match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))){ -            Instruction *YS = BinaryOperator::CreateShl( -                                                     Op0BO->getOperand(1), Op1, -                                                     Op0BO->getName()); -            InsertNewInstBefore(YS, I); // (Y << C) -            Instruction *X = -              BinaryOperator::Create(Op0BO->getOpcode(), V1, YS, -                                     Op0BO->getOperand(0)->getName()); -            InsertNewInstBefore(X, I);  // (X + (Y << C)) +              match(Op0BO->getOperand(0), m_Shr(m_Value(V1), +                    m_Specific(Op1)))) { +            Value *YS =  // (Y << C) +              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); +            // (X + (Y << C)) +            Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, +                                            Op0BO->getOperand(0)->getName());              uint32_t Op1Val = Op1->getLimitedValue(TypeBits); -            return BinaryOperator::CreateAnd(X, Context->getConstantInt( +            return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context,                         APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));            } @@ -7504,15 +7489,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,                            m_ConstantInt(CC))) && V2 == Op1 &&                cast<BinaryOperator>(Op0BO->getOperand(0))                    ->getOperand(0)->hasOneUse()) { -            Instruction *YS = BinaryOperator::CreateShl( -                                                     Op0BO->getOperand(1), Op1, -                                                     Op0BO->getName()); -            InsertNewInstBefore(YS, I); // (Y << C) -            Instruction *XM = -              BinaryOperator::CreateAnd(V1,  -                                        Context->getConstantExprShl(CC, Op1), -                                        V1->getName()+".mask"); -            InsertNewInstBefore(XM, I); // X & (CC << C) +            Value *YS = // (Y << C) +              Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); +            // X & (CC << C) +            Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), +                                           V1->getName()+".mask");              return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);            } @@ -7552,11 +7533,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,            isValid = Op0C->getValue()[TypeBits-1] == highBitSet;          if (isValid) { -          Constant *NewRHS = Context->getConstantExpr(I.getOpcode(), Op0C, Op1); +          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); -          Instruction *NewShift = -            BinaryOperator::Create(I.getOpcode(), Op0BO->getOperand(0), Op1); -          InsertNewInstBefore(NewShift, I); +          Value *NewShift = +            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);            NewShift->takeName(Op0BO);            return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, @@ -7589,31 +7569,33 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,        // saturates.        if (AmtSum >= TypeBits) {          if (I.getOpcode() != Instruction::AShr) -          return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));          AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.        }        return BinaryOperator::Create(I.getOpcode(), X, -                                    Context->getConstantInt(Ty, AmtSum)); -    } else if (ShiftOp->getOpcode() == Instruction::LShr && -               I.getOpcode() == Instruction::AShr) { +                                    ConstantInt::get(Ty, AmtSum)); +    } +     +    if (ShiftOp->getOpcode() == Instruction::LShr && +        I.getOpcode() == Instruction::AShr) {        if (AmtSum >= TypeBits) -        return ReplaceInstUsesWith(I, Context->getNullValue(I.getType())); +        return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));        // ((X >>u C1) >>s C2) -> (X >>u (C1+C2))  since C1 != 0. -      return BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, AmtSum)); -    } else if (ShiftOp->getOpcode() == Instruction::AShr && -               I.getOpcode() == Instruction::LShr) { +      return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum)); +    } +     +    if (ShiftOp->getOpcode() == Instruction::AShr && +        I.getOpcode() == Instruction::LShr) {        // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.        if (AmtSum >= TypeBits)          AmtSum = TypeBits-1; -      Instruction *Shift = -        BinaryOperator::CreateAShr(X, Context->getConstantInt(Ty, AmtSum)); -      InsertNewInstBefore(Shift, I); +      Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); -      return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); +      return BinaryOperator::CreateAnd(Shift, ConstantInt::get(*Context, Mask));      }      // Okay, if we get here, one shift must be left, and the other shift must be @@ -7622,12 +7604,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,        // If we have ((X >>? C) << C), turn this into X & (-1 << C).        if (I.getOpcode() == Instruction::Shl) {          APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1)); -        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask)); +        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));        }        // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).        if (I.getOpcode() == Instruction::LShr) {          APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1)); -        return BinaryOperator::CreateAnd(X, Context->getConstantInt(Mask)); +        return BinaryOperator::CreateAnd(X, ConstantInt::get(*Context, Mask));        }        // We can simplify ((X << C) >>s C) into a trunc + sext.        // NOTE: we could do this for any C, but that would make 'unusual' integer @@ -7641,15 +7623,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,        case 32 :        case 64 :        case 128: -        SExtType = Context->getIntegerType(Ty->getBitWidth() - ShiftAmt1); +        SExtType = IntegerType::get(*Context, Ty->getBitWidth() - ShiftAmt1);          break;        default: break;        } -      if (SExtType) { -        Instruction *NewTrunc = new TruncInst(X, SExtType, "sext"); -        InsertNewInstBefore(NewTrunc, I); -        return new SExtInst(NewTrunc, Ty); -      } +      if (SExtType) +        return new SExtInst(Builder->CreateTrunc(X, SExtType, "sext"), Ty);        // Otherwise, we can't handle it yet.      } else if (ShiftAmt1 < ShiftAmt2) {        uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1; @@ -7658,23 +7637,21 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,        if (I.getOpcode() == Instruction::Shl) {          assert(ShiftOp->getOpcode() == Instruction::LShr ||                 ShiftOp->getOpcode() == Instruction::AShr); -        Instruction *Shift = -          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff)); -        InsertNewInstBefore(Shift, I); +        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));          APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); -        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); +        return BinaryOperator::CreateAnd(Shift, +                                         ConstantInt::get(*Context, Mask));        }        // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)        if (I.getOpcode() == Instruction::LShr) {          assert(ShiftOp->getOpcode() == Instruction::Shl); -        Instruction *Shift = -          BinaryOperator::CreateLShr(X, Context->getConstantInt(Ty, ShiftDiff)); -        InsertNewInstBefore(Shift, I); +        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));          APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); -        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); +        return BinaryOperator::CreateAnd(Shift, +                                         ConstantInt::get(*Context, Mask));        }        // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. @@ -7686,24 +7663,22 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,        if (I.getOpcode() == Instruction::Shl) {          assert(ShiftOp->getOpcode() == Instruction::LShr ||                 ShiftOp->getOpcode() == Instruction::AShr); -        Instruction *Shift = -          BinaryOperator::Create(ShiftOp->getOpcode(), X, -                                 Context->getConstantInt(Ty, ShiftDiff)); -        InsertNewInstBefore(Shift, I); +        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X, +                                            ConstantInt::get(Ty, ShiftDiff));          APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2)); -        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); +        return BinaryOperator::CreateAnd(Shift, +                                         ConstantInt::get(*Context, Mask));        }        // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)        if (I.getOpcode() == Instruction::LShr) {          assert(ShiftOp->getOpcode() == Instruction::Shl); -        Instruction *Shift = -          BinaryOperator::CreateShl(X, Context->getConstantInt(Ty, ShiftDiff)); -        InsertNewInstBefore(Shift, I); +        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));          APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2)); -        return BinaryOperator::CreateAnd(Shift, Context->getConstantInt(Mask)); +        return BinaryOperator::CreateAnd(Shift, +                                         ConstantInt::get(*Context, Mask));        }        // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in. @@ -7718,12 +7693,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,  /// X*Scale+Offset.  ///  static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, -                                        int &Offset, LLVMContext* Context) { -  assert(Val->getType() == Type::Int32Ty && "Unexpected allocation size type!"); +                                        int &Offset, LLVMContext *Context) { +  assert(Val->getType() == Type::getInt32Ty(*Context) &&  +         "Unexpected allocation size type!");    if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {      Offset = CI->getZExtValue();      Scale  = 0; -    return Context->getConstantInt(Type::Int32Ty, 0); +    return ConstantInt::get(Type::getInt32Ty(*Context), 0);    } else if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {      if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {        if (I->getOpcode() == Instruction::Shl) { @@ -7763,6 +7739,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,                                                     AllocationInst &AI) {    const PointerType *PTy = cast<PointerType>(CI.getType()); +  BuilderTy AllocaBuilder(*Builder); +  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI); +      // Remove any uses of AI that are dead.    assert(!CI.use_empty() && "Dead instructions should be removed earlier!"); @@ -7773,11 +7752,14 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,          ++UI; // If this instruction uses AI more than once, don't break UI.        ++NumDeadInst; -      DOUT << "IC: DCE: " << *User; +      DEBUG(errs() << "IC: DCE: " << *User << '\n');        EraseInstFromFunction(*User);      }    } -   + +  // This requires TargetData to get the alloca alignment and size information. +  if (!TD) return 0; +    // Get the type really allocated and the type casted to.    const Type *AllocElTy = AI.getAllocatedType();    const Type *CastElTy = PTy->getElementType(); @@ -7816,30 +7798,22 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,    if (Scale == 1) {      Amt = NumElements;    } else { -    // If the allocation size is constant, form a constant mul expression -    Amt = Context->getConstantInt(Type::Int32Ty, Scale); -    if (isa<ConstantInt>(NumElements)) -      Amt = Context->getConstantExprMul(cast<ConstantInt>(NumElements), -                                 cast<ConstantInt>(Amt)); -    // otherwise multiply the amount and the number of elements -    else { -      Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp"); -      Amt = InsertNewInstBefore(Tmp, AI); -    } +    Amt = ConstantInt::get(Type::getInt32Ty(*Context), Scale); +    // Insert before the alloca, not before the cast. +    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");    }    if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { -    Value *Off = Context->getConstantInt(Type::Int32Ty, Offset, true); -    Instruction *Tmp = BinaryOperator::CreateAdd(Amt, Off, "tmp"); -    Amt = InsertNewInstBefore(Tmp, AI); +    Value *Off = ConstantInt::get(Type::getInt32Ty(*Context), Offset, true); +    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");    }    AllocationInst *New;    if (isa<MallocInst>(AI)) -    New = new MallocInst(CastElTy, Amt, AI.getAlignment()); +    New = AllocaBuilder.CreateMalloc(CastElTy, Amt);    else -    New = new AllocaInst(CastElTy, Amt, AI.getAlignment()); -  InsertNewInstBefore(New, AI); +    New = AllocaBuilder.CreateAlloca(CastElTy, Amt); +  New->setAlignment(AI.getAlignment());    New->takeName(&AI);    // If the allocation has one real use plus a dbg.declare, just remove the @@ -7851,11 +7825,9 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,    // things that used it to use the new cast.  This will also hack on CI, but it    // will die soon.    else if (!AI.hasOneUse()) { -    AddUsesToWorkList(AI);      // New is the allocation instruction, pointer typed. AI is the original      // allocation instruction, also pointer typed. Thus, cast to use is BitCast. -    CastInst *NewCast = new BitCastInst(New, AI.getType(), "tmpcast"); -    InsertNewInstBefore(NewCast, AI); +    Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");      AI.replaceAllUsesWith(NewCast);    }    return ReplaceInstUsesWith(CI, New); @@ -7923,6 +7895,23 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,             CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc,                                        NumCastsRemoved); +  case Instruction::UDiv: +  case Instruction::URem: { +    // UDiv and URem can be truncated if all the truncated bits are zero. +    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); +    uint32_t BitWidth = Ty->getScalarSizeInBits(); +    if (BitWidth < OrigBitWidth) { +      APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth); +      if (MaskedValueIsZero(I->getOperand(0), Mask) && +          MaskedValueIsZero(I->getOperand(1), Mask)) { +        return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc, +                                          NumCastsRemoved) && +               CanEvaluateInDifferentType(I->getOperand(1), Ty, CastOpc, +                                          NumCastsRemoved); +      } +    } +    break; +  }    case Instruction::Shl:      // If we are truncating the result of this SHL, and if it's a shift of a      // constant amount, we can always perform a SHL in a smaller type. @@ -7993,7 +7982,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,  Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,                                                bool isSigned) {    if (Constant *C = dyn_cast<Constant>(V)) -    return Context->getConstantExprIntegerCast(C, Ty, +    return ConstantExpr::getIntegerCast(C, Ty,                                                 isSigned /*Sext or ZExt*/);    // Otherwise, it must be an instruction. @@ -8009,7 +7998,9 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,    case Instruction::Xor:    case Instruction::AShr:    case Instruction::LShr: -  case Instruction::Shl: { +  case Instruction::Shl: +  case Instruction::UDiv: +  case Instruction::URem: {      Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);      Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);      Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); @@ -8046,7 +8037,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,    }    default:       // TODO: Can handle more cases here. -    assert(0 && "Unreachable!"); +    llvm_unreachable("Unreachable!");      break;    } @@ -8089,13 +8080,14 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {  static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,                                          SmallVectorImpl<Value*> &NewIndices,                                         const TargetData *TD, -                                       LLVMContext* Context) { +                                       LLVMContext *Context) { +  if (!TD) return 0;    if (!Ty->isSized()) return 0;    // Start with the index over the outer type.  Note that the type size    // might be zero (even if the offset isn't zero) if the indexed type    // is something like [0 x {int, int}] -  const Type *IntPtrTy = TD->getIntPtrType(); +  const Type *IntPtrTy = TD->getIntPtrType(*Context);    int64_t FirstIdx = 0;    if (int64_t TySize = TD->getTypeAllocSize(Ty)) {      FirstIdx = Offset/TySize; @@ -8110,7 +8102,7 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,      assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");    } -  NewIndices.push_back(Context->getConstantInt(IntPtrTy, FirstIdx)); +  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));    // Index into the types.  If we fail, set OrigBase to null.    while (Offset) { @@ -8124,14 +8116,14 @@ static const Type *FindElementAtOffset(const Type *Ty, int64_t Offset,               "Offset must stay within the indexed type");        unsigned Elt = SL->getElementContainingOffset(Offset); -      NewIndices.push_back(Context->getConstantInt(Type::Int32Ty, Elt)); +      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Elt));        Offset -= SL->getElementOffset(Elt);        Ty = STy->getElementType(Elt);      } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {        uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());        assert(EltSize && "Cannot index into a zero-sized array"); -      NewIndices.push_back(Context->getConstantInt(IntPtrTy,Offset/EltSize)); +      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));        Offset %= EltSize;        Ty = AT->getElementType();      } else { @@ -8154,7 +8146,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {        // Changing the cast operand is usually not a good idea but it is safe        // here because the pointer operand is being replaced with another         // pointer operand so the opcode doesn't need to change. -      AddToWorkList(GEP); +      Worklist.Add(GEP);        CI.setOperand(0, GEP->getOperand(0));        return &CI;      } @@ -8163,7 +8155,7 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {      // GEP computes a constant offset, see if we can convert these three      // instructions into fewer.  This typically happens with unions and other      // non-type-safe code. -    if (GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) { +    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0))) {        if (GEP->hasAllConstantIndices()) {          // We are guaranteed to get a constant from EmitGEPOffset.          ConstantInt *OffsetV = @@ -8179,10 +8171,10 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {            // If we were able to index down into an element, create the GEP            // and bitcast the result.  This eliminates one bitcast, potentially            // two. -          Instruction *NGEP = GetElementPtrInst::Create(OrigBase,  -                                                        NewIndices.begin(), -                                                        NewIndices.end(), ""); -          InsertNewInstBefore(NGEP, CI); +          Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ? +            Builder->CreateInBoundsGEP(OrigBase, +                                       NewIndices.begin(), NewIndices.end()) : +            Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());            NGEP->takeName(GEP);            if (isa<BitCastInst>(CI)) @@ -8214,10 +8206,8 @@ static bool isSafeIntegerType(const Type *Ty) {    }  } -/// Only the TRUNC, ZEXT, SEXT, and BITCAST can both operand and result as -/// integer types. This function implements the common transforms for all those -/// cases. -/// @brief Implement the transforms common to CastInst with integer operands +/// commonIntCastTransforms - This function implements the common transforms +/// for trunc, zext, and sext.  Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {    if (Instruction *Result = commonCastTransforms(CI))      return Result; @@ -8241,11 +8231,10 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {    // Attempt to propagate the cast into the instruction for int->int casts.    int NumCastsRemoved = 0; -  if (!isa<BitCastInst>(CI) && -      // Only do this if the dest type is a simple type, don't convert the -      // expression tree to something weird like i93 unless the source is also -      // strange. -      (isSafeIntegerType(DestTy->getScalarType()) || +  // Only do this if the dest type is a simple type, don't convert the +  // expression tree to something weird like i93 unless the source is also +  // strange. +  if ((isSafeIntegerType(DestTy->getScalarType()) ||         !isSafeIntegerType(SrcI->getType()->getScalarType())) &&        CanEvaluateInDifferentType(SrcI, DestTy,                                   CI.getOpcode(), NumCastsRemoved)) { @@ -8261,7 +8250,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {      default:        // All the others use floating point so we shouldn't actually         // get here because of the check above. -      assert(0 && "Unknown cast type"); +      llvm_unreachable("Unknown cast type");      case Instruction::Trunc:        DoXForm = true;        break; @@ -8307,8 +8296,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {      }      if (DoXForm) { -      DOUT << "ICE: EvaluateInDifferentType converting expression type to avoid" -           << " cast: " << CI; +      DEBUG(errs() << "ICE: EvaluateInDifferentType converting expression type" +            " to avoid cast: " << CI);        Value *Res = EvaluateInDifferentType(SrcI, DestTy,                                              CI.getOpcode() == Instruction::SExt);        if (JustReplace) @@ -8317,9 +8306,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {        assert(Res->getType() == DestTy);        switch (CI.getOpcode()) { -      default: assert(0 && "Unknown cast type!"); +      default: llvm_unreachable("Unknown cast type!");        case Instruction::Trunc: -      case Instruction::BitCast:          // Just replace this cast with the result.          return ReplaceInstUsesWith(CI, Res);        case Instruction::ZExt: { @@ -8332,8 +8320,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {            return ReplaceInstUsesWith(CI, Res);          // We need to emit an AND to clear the high bits. -        Constant *C = Context->getConstantInt(APInt::getLowBitsSet(DestBitSize, -                                                            SrcBitSize)); +        Constant *C = ConstantInt::get(*Context,  +                                 APInt::getLowBitsSet(DestBitSize, SrcBitSize));          return BinaryOperator::CreateAnd(Res, C);        }        case Instruction::SExt: { @@ -8344,9 +8332,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {            return ReplaceInstUsesWith(CI, Res);          // We need to emit a cast to truncate, then a cast to sext. -        return CastInst::Create(Instruction::SExt, -            InsertCastBefore(Instruction::Trunc, Res, Src->getType(),  -                             CI), DestTy); +        return new SExtInst(Builder->CreateTrunc(Res, Src->getType()), DestTy);        }        }      } @@ -8362,16 +8348,12 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {    case Instruction::Or:    case Instruction::Xor:      // If we are discarding information, rewrite. -    if (DestBitSize <= SrcBitSize && DestBitSize != 1) { -      // Don't insert two casts if they cannot be eliminated.  We allow  -      // two casts to be inserted if the sizes are the same.  This could  -      // only be converting signedness, which is a noop. -      if (DestBitSize == SrcBitSize ||  -          !ValueRequiresCast(CI.getOpcode(), Op1, DestTy,TD) || +    if (DestBitSize < SrcBitSize && DestBitSize != 1) { +      // Don't insert two casts unless at least one can be eliminated. +      if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) ||            !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { -        Instruction::CastOps opcode = CI.getOpcode(); -        Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); -        Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); +        Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); +        Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());          return BinaryOperator::Create(              cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c);        } @@ -8380,62 +8362,25 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {      // cast (xor bool X, true) to int  --> xor (cast bool X to int), 1      if (isa<ZExtInst>(CI) && SrcBitSize == 1 &&           SrcI->getOpcode() == Instruction::Xor && -        Op1 == Context->getConstantIntTrue() && +        Op1 == ConstantInt::getTrue(*Context) &&          (!Op0->hasOneUse() || !isa<CmpInst>(Op0))) { -      Value *New = InsertCastBefore(Instruction::ZExt, Op0, DestTy, CI); +      Value *New = Builder->CreateZExt(Op0, DestTy, Op0->getName());        return BinaryOperator::CreateXor(New, -                                      Context->getConstantInt(CI.getType(), 1)); -    } -    break; -  case Instruction::SDiv: -  case Instruction::UDiv: -  case Instruction::SRem: -  case Instruction::URem: -    // If we are just changing the sign, rewrite. -    if (DestBitSize == SrcBitSize) { -      // Don't insert two casts if they cannot be eliminated.  We allow  -      // two casts to be inserted if the sizes are the same.  This could  -      // only be converting signedness, which is a noop. -      if (!ValueRequiresCast(CI.getOpcode(), Op1, DestTy, TD) ||  -          !ValueRequiresCast(CI.getOpcode(), Op0, DestTy, TD)) { -        Value *Op0c = InsertCastBefore(Instruction::BitCast,  -                                       Op0, DestTy, *SrcI); -        Value *Op1c = InsertCastBefore(Instruction::BitCast,  -                                       Op1, DestTy, *SrcI); -        return BinaryOperator::Create( -          cast<BinaryOperator>(SrcI)->getOpcode(), Op0c, Op1c); -      } +                                      ConstantInt::get(CI.getType(), 1));      }      break; -  case Instruction::Shl: -    // Allow changing the sign of the source operand.  Do not allow  -    // changing the size of the shift, UNLESS the shift amount is a  -    // constant.  We must not change variable sized shifts to a smaller  -    // size, because it is undefined to shift more bits out than exist  -    // in the value. -    if (DestBitSize == SrcBitSize || -        (DestBitSize < SrcBitSize && isa<Constant>(Op1))) { -      Instruction::CastOps opcode = (DestBitSize == SrcBitSize ? -          Instruction::BitCast : Instruction::Trunc); -      Value *Op0c = InsertCastBefore(opcode, Op0, DestTy, *SrcI); -      Value *Op1c = InsertCastBefore(opcode, Op1, DestTy, *SrcI); +  case Instruction::Shl: { +    // Canonicalize trunc inside shl, if we can. +    ConstantInt *CI = dyn_cast<ConstantInt>(Op1); +    if (CI && DestBitSize < SrcBitSize && +        CI->getLimitedValue(DestBitSize) < DestBitSize) { +      Value *Op0c = Builder->CreateTrunc(Op0, DestTy, Op0->getName()); +      Value *Op1c = Builder->CreateTrunc(Op1, DestTy, Op1->getName());        return BinaryOperator::CreateShl(Op0c, Op1c);      }      break; -  case Instruction::AShr: -    // If this is a signed shr, and if all bits shifted in are about to be -    // truncated off, turn it into an unsigned shr to allow greater -    // simplifications. -    if (DestBitSize < SrcBitSize && -        isa<ConstantInt>(Op1)) { -      uint32_t ShiftAmt = cast<ConstantInt>(Op1)->getLimitedValue(SrcBitSize); -      if (SrcBitSize > ShiftAmt && SrcBitSize-ShiftAmt >= DestBitSize) { -        // Insert the new logical shift right. -        return BinaryOperator::CreateLShr(Op0, Op1); -      } -    } -    break; +  }    }    return 0;  } @@ -8450,11 +8395,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {    uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();    // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0) -  if (DestBitWidth == 1 && -      isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) { -    Constant *One = Context->getConstantInt(Src->getType(), 1); -    Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI); -    Value *Zero = Context->getNullValue(Src->getType()); +  if (DestBitWidth == 1) { +    Constant *One = ConstantInt::get(Src->getType(), 1); +    Src = Builder->CreateAnd(Src, One, "tmp"); +    Value *Zero = Constant::getNullValue(Src->getType());      return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);    } @@ -8469,12 +8413,12 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {      APInt Mask(APInt::getLowBitsSet(SrcBitWidth, ShAmt).shl(DestBitWidth));      if (MaskedValueIsZero(ShiftOp, Mask)) {        if (ShAmt >= DestBitWidth)        // All zeros. -        return ReplaceInstUsesWith(CI, Context->getNullValue(Ty)); +        return ReplaceInstUsesWith(CI, Constant::getNullValue(Ty));        // Okay, we can shrink this.  Truncate the input, then return a new        // shift. -      Value *V1 = InsertCastBefore(Instruction::Trunc, ShiftOp, Ty, CI); -      Value *V2 = Context->getConstantExprTrunc(ShAmtV, Ty); +      Value *V1 = Builder->CreateTrunc(ShiftOp, Ty, ShiftOp->getName()); +      Value *V2 = ConstantExpr::getTrunc(ShAmtV, Ty);        return BinaryOperator::CreateLShr(V1, V2);      }    } @@ -8499,20 +8443,15 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,        if (!DoXform) return ICI;        Value *In = ICI->getOperand(0); -      Value *Sh = Context->getConstantInt(In->getType(), +      Value *Sh = ConstantInt::get(In->getType(),                                     In->getType()->getScalarSizeInBits()-1); -      In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh, -                                                        In->getName()+".lobit"), -                               CI); +      In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");        if (In->getType() != CI.getType()) -        In = CastInst::CreateIntegerCast(In, CI.getType(), -                                         false/*ZExt*/, "tmp", &CI); +        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");        if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { -        Constant *One = Context->getConstantInt(In->getType(), 1); -        In = InsertNewInstBefore(BinaryOperator::CreateXor(In, One, -                                                         In->getName()+".not"), -                                 CI); +        Constant *One = ConstantInt::get(In->getType(), 1); +        In = Builder->CreateXor(In, One, In->getName()+".not");        }        return ReplaceInstUsesWith(CI, In); @@ -8545,8 +8484,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,          if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {            // (X&4) == 2 --> false            // (X&4) != 2 --> true -          Constant *Res = Context->getConstantInt(Type::Int1Ty, isNE); -          Res = Context->getConstantExprZExt(Res, CI.getType()); +          Constant *Res = ConstantInt::get(Type::getInt1Ty(*Context), isNE); +          Res = ConstantExpr::getZExt(Res, CI.getType());            return ReplaceInstUsesWith(CI, Res);          } @@ -8555,15 +8494,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,          if (ShiftAmt) {            // Perform a logical shr by shiftamt.            // Insert the shift to put the result in the low bit. -          In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, -                              Context->getConstantInt(In->getType(), ShiftAmt), -                                                   In->getName()+".lobit"), CI); +          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt), +                                   In->getName()+".lobit");          }          if ((Op1CV != 0) == isNE) { // Toggle the low bit. -          Constant *One = Context->getConstantInt(In->getType(), 1); -          In = BinaryOperator::CreateXor(In, One, "tmp"); -          InsertNewInstBefore(cast<Instruction>(In), CI); +          Constant *One = ConstantInt::get(In->getType(), 1); +          In = Builder->CreateXor(In, One, "tmp");          }          if (CI.getType() == In->getType()) @@ -8600,21 +8537,21 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {      // SrcSize  > DstSize: trunc(a) & mask      if (SrcSize < DstSize) {        APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); -      Constant *AndConst = Context->getConstantInt(A->getType(), AndValue); -      Instruction *And = -        BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask"); -      InsertNewInstBefore(And, CI); +      Constant *AndConst = ConstantInt::get(A->getType(), AndValue); +      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");        return new ZExtInst(And, CI.getType()); -    } else if (SrcSize == DstSize) { +    } +     +    if (SrcSize == DstSize) {        APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); -      return BinaryOperator::CreateAnd(A, Context->getConstantInt(A->getType(), +      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),                                                             AndValue)); -    } else if (SrcSize > DstSize) { -      Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp"); -      InsertNewInstBefore(Trunc, CI); +    } +    if (SrcSize > DstSize) { +      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");        APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));        return BinaryOperator::CreateAnd(Trunc,  -                                       Context->getConstantInt(Trunc->getType(), +                                       ConstantInt::get(Trunc->getType(),                                                                 AndValue));      }    } @@ -8631,8 +8568,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {      if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&          (transformZExtICmp(LHS, CI, false) ||           transformZExtICmp(RHS, CI, false))) { -      Value *LCast = InsertCastBefore(Instruction::ZExt, LHS, CI.getType(), CI); -      Value *RCast = InsertCastBefore(Instruction::ZExt, RHS, CI.getType(), CI); +      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); +      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());        return BinaryOperator::Create(Instruction::Or, LCast, RCast);      }    } @@ -8645,7 +8582,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {          if (TI0->getType() == CI.getType())            return              BinaryOperator::CreateAnd(TI0, -                                Context->getConstantExprZExt(C, CI.getType())); +                                ConstantExpr::getZExt(C, CI.getType()));        }    // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)). @@ -8657,9 +8594,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {            if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {              Value *TI0 = TI->getOperand(0);              if (TI0->getType() == CI.getType()) { -              Constant *ZC = Context->getConstantExprZExt(C, CI.getType()); -              Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp"); -              InsertNewInstBefore(NewAnd, *And); +              Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); +              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");                return BinaryOperator::CreateXor(NewAnd, ZC);              }            } @@ -8674,14 +8610,14 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {    Value *Src = CI.getOperand(0);    // Canonicalize sign-extend from i1 to a select. -  if (Src->getType() == Type::Int1Ty) +  if (Src->getType() == Type::getInt1Ty(*Context))      return SelectInst::Create(Src, -                              Context->getConstantIntAllOnesValue(CI.getType()), -                              Context->getNullValue(CI.getType())); +                              Constant::getAllOnesValue(CI.getType()), +                              Constant::getNullValue(CI.getType()));    // See if the value being truncated is already sign extended.  If so, just    // eliminate the trunc/sext pair. -  if (getOpcode(Src) == Instruction::Trunc) { +  if (Operator::getOpcode(Src) == Instruction::Trunc) {      Value *Op = cast<User>(Src)->getOperand(0);      unsigned OpBits   = Op->getType()->getScalarSizeInBits();      unsigned MidBits  = Src->getType()->getScalarSizeInBits(); @@ -8729,9 +8665,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {        unsigned MidSize = Src->getType()->getScalarSizeInBits();        unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();        unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; -      Constant *ShAmtV = Context->getConstantInt(CI.getType(), ShAmt); -      I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV, -                                                        CI.getName()), CI); +      Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); +      I = Builder->CreateShl(I, ShAmtV, CI.getName());        return BinaryOperator::CreateAShr(I, ShAmtV);      }    } @@ -8742,18 +8677,18 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {  /// FitsInFPType - Return a Constant* for the specified FP constant if it fits  /// in the specified FP type without changing its value.  static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem, -                              LLVMContext* Context) { +                              LLVMContext *Context) {    bool losesInfo;    APFloat F = CFP->getValueAPF();    (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);    if (!losesInfo) -    return Context->getConstantFP(F); +    return ConstantFP::get(*Context, F);    return 0;  }  /// LookThroughFPExtensions - If this is an fp extension instruction, look  /// through it until we get the source value. -static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) { +static Value *LookThroughFPExtensions(Value *V, LLVMContext *Context) {    if (Instruction *I = dyn_cast<Instruction>(V))      if (I->getOpcode() == Instruction::FPExt)        return LookThroughFPExtensions(I->getOperand(0), Context); @@ -8762,12 +8697,12 @@ static Value *LookThroughFPExtensions(Value *V, LLVMContext* Context) {    // that can accurately represent it.  This allows us to turn    // (float)((double)X+2.0) into x+2.0f.    if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { -    if (CFP->getType() == Type::PPC_FP128Ty) +    if (CFP->getType() == Type::getPPC_FP128Ty(*Context))        return V;  // No constant folding of this.      // See if the value can be truncated to float and then reextended.      if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle, Context))        return V; -    if (CFP->getType() == Type::DoubleTy) +    if (CFP->getType() == Type::getDoubleTy(*Context))        return V;  // Won't shrink.      if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble, Context))        return V; @@ -8804,10 +8739,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {          // the cast, do this xform.          if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&              RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) { -          LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc, -                                      CI.getType(), CI); -          RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc, -                                      CI.getType(), CI); +          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType()); +          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());            return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);          }        } @@ -8875,10 +8808,11 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {    // trunc to be exposed to other transforms.  Don't do this for extending    // ptrtoint's, because we don't know if the target sign or zero extends its    // pointers. -  if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { -    Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0), -                                                    TD->getIntPtrType(), -                                                    "tmp"), CI); +  if (TD && +      CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) { +    Value *P = Builder->CreatePtrToInt(CI.getOperand(0), +                                       TD->getIntPtrType(CI.getContext()), +                                       "tmp");      return new TruncInst(P, CI.getType());    } @@ -8891,65 +8825,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {    // allows the trunc to be exposed to other transforms.  Don't do this for    // extending inttoptr's, because we don't know if the target sign or zero    // extends to pointers. -  if (CI.getOperand(0)->getType()->getScalarSizeInBits() > +  if (TD && CI.getOperand(0)->getType()->getScalarSizeInBits() >        TD->getPointerSizeInBits()) { -    Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0), -                                                 TD->getIntPtrType(), -                                                 "tmp"), CI); +    Value *P = Builder->CreateTrunc(CI.getOperand(0), +                                    TD->getIntPtrType(CI.getContext()), "tmp");      return new IntToPtrInst(P, CI.getType());    }    if (Instruction *I = commonCastTransforms(CI))      return I; -   -  const Type *DestPointee = cast<PointerType>(CI.getType())->getElementType(); -  if (!DestPointee->isSized()) return 0; - -  // If this is inttoptr(add (ptrtoint x), cst), try to turn this into a GEP. -  ConstantInt *Cst; -  Value *X; -  if (match(CI.getOperand(0), m_Add(m_Cast<PtrToIntInst>(m_Value(X)), -                                    m_ConstantInt(Cst)))) { -    // If the source and destination operands have the same type, see if this -    // is a single-index GEP. -    if (X->getType() == CI.getType()) { -      // Get the size of the pointee type. -      uint64_t Size = TD->getTypeAllocSize(DestPointee); - -      // Convert the constant to intptr type. -      APInt Offset = Cst->getValue(); -      Offset.sextOrTrunc(TD->getPointerSizeInBits()); - -      // If Offset is evenly divisible by Size, we can do this xform. -      if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ -        Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); -        return GetElementPtrInst::Create(X, Context->getConstantInt(Offset)); -      } -    } -    // TODO: Could handle other cases, e.g. where add is indexing into field of -    // struct etc. -  } else if (CI.getOperand(0)->hasOneUse() && -             match(CI.getOperand(0), m_Add(m_Value(X), m_ConstantInt(Cst)))) { -    // Otherwise, if this is inttoptr(add x, cst), try to turn this into an -    // "inttoptr+GEP" instead of "add+intptr". -     -    // Get the size of the pointee type. -    uint64_t Size = TD->getTypeAllocSize(DestPointee); -     -    // Convert the constant to intptr type. -    APInt Offset = Cst->getValue(); -    Offset.sextOrTrunc(TD->getPointerSizeInBits()); -     -    // If Offset is evenly divisible by Size, we can do this xform. -    if (Size && !APIntOps::srem(Offset, APInt(Offset.getBitWidth(), Size))){ -      Offset = APIntOps::sdiv(Offset, APInt(Offset.getBitWidth(), Size)); -       -      Instruction *P = InsertNewInstBefore(new IntToPtrInst(X, CI.getType(), -                                                            "tmp"), CI); -      return GetElementPtrInst::Create(P, -                                       Context->getConstantInt(Offset), "tmp"); -    } -  } +    return 0;  } @@ -8960,10 +8845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {    const Type *SrcTy = Src->getType();    const Type *DestTy = CI.getType(); -  if (SrcTy->isInteger() && DestTy->isInteger()) { -    if (Instruction *Result = commonIntCastTransforms(CI)) -      return Result; -  } else if (isa<PointerType>(SrcTy)) { +  if (isa<PointerType>(SrcTy)) {      if (Instruction *I = commonPointerCastTransforms(CI))        return I;    } else { @@ -8987,8 +8869,10 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {      if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())        return 0; -    // If we are casting a malloc or alloca to a pointer to a type of the same +    // If we are casting a alloca to a pointer to a type of the same      // size, rewrite the allocation instruction to allocate the "right" type. +    // There is no need to modify malloc calls because it is their bitcast that +    // needs to be cleaned up.      if (AllocationInst *AI = dyn_cast<AllocationInst>(Src))        if (Instruction *V = PromoteCastOfAllocation(CI, *AI))          return V; @@ -8996,7 +8880,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {      // If the source and destination are pointers, and this cast is equivalent      // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.      // This can enhance SROA and other transforms that want type-safe pointers. -    Constant *ZeroUInt = Context->getNullValue(Type::Int32Ty); +    Constant *ZeroUInt = Constant::getNullValue(Type::getInt32Ty(*Context));      unsigned NumZeros = 0;      while (SrcElTy != DstElTy &&              isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) && @@ -9008,8 +8892,30 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {      // If we found a path from the src to dest, create the getelementptr now.      if (SrcElTy == DstElTy) {        SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt); -      return GetElementPtrInst::Create(Src, Idxs.begin(), Idxs.end(), "",  -                                       ((Instruction*) NULL)); +      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(), "", +                                               ((Instruction*) NULL)); +    } +  } + +  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { +    if (DestVTy->getNumElements() == 1) { +      if (!isa<VectorType>(SrcTy)) { +        Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); +        return InsertElementInst::Create(UndefValue::get(DestTy), Elem, +                            Constant::getNullValue(Type::getInt32Ty(*Context))); +      } +      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) +    } +  } + +  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) { +    if (SrcVTy->getNumElements() == 1) { +      if (!isa<VectorType>(DestTy)) { +        Value *Elem =  +          Builder->CreateExtractElement(Src, +                            Constant::getNullValue(Type::getInt32Ty(*Context))); +        return CastInst::Create(Instruction::BitCast, Elem, DestTy); +      }      }    } @@ -9030,10 +8936,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {               Tmp->getOperand(0)->getType() == DestTy) ||              ((Tmp = dyn_cast<CastInst>(SVI->getOperand(1))) &&                Tmp->getOperand(0)->getType() == DestTy)) { -          Value *LHS = InsertCastBefore(Instruction::BitCast, -                                        SVI->getOperand(0), DestTy, CI); -          Value *RHS = InsertCastBefore(Instruction::BitCast, -                                        SVI->getOperand(1), DestTy, CI); +          Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); +          Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);            // Return a new shuffle vector.  Use the same element ID's, as we            // know the vector types match #elts.            return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); @@ -9076,9 +8980,9 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {  /// GetSelectFoldableConstant - For the same transformation as the previous  /// function, return the identity constant that goes into the select.  static Constant *GetSelectFoldableConstant(Instruction *I, -                                           LLVMContext* Context) { +                                           LLVMContext *Context) {    switch (I->getOpcode()) { -  default: assert(0 && "This cannot happen!"); abort(); +  default: llvm_unreachable("This cannot happen!");    case Instruction::Add:    case Instruction::Sub:    case Instruction::Or: @@ -9086,11 +8990,11 @@ static Constant *GetSelectFoldableConstant(Instruction *I,    case Instruction::Shl:    case Instruction::LShr:    case Instruction::AShr: -    return Context->getNullValue(I->getType()); +    return Constant::getNullValue(I->getType());    case Instruction::And: -    return Context->getAllOnesValue(I->getType()); +    return Constant::getAllOnesValue(I->getType());    case Instruction::Mul: -    return Context->getConstantInt(I->getType(), 1); +    return ConstantInt::get(I->getType(), 1);    }  } @@ -9110,7 +9014,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,      // Fold this by inserting a select from the input values.      SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0), -                                           FI->getOperand(0), SI.getName()+".v"); +                                          FI->getOperand(0), SI.getName()+".v");      InsertNewInstBefore(NewSI, SI);      return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,                               TI->getType()); @@ -9160,7 +9064,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,      else        return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);    } -  assert(0 && "Shouldn't get here"); +  llvm_unreachable("Shouldn't get here");    return 0;  } @@ -9202,7 +9106,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,              NewSel->takeName(TVI);              if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))                return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); -            assert(0 && "Unknown instruction!!"); +            llvm_unreachable("Unknown instruction!!");            }          }        } @@ -9231,7 +9135,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,              NewSel->takeName(FVI);              if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))                return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); -            assert(0 && "Unknown instruction!!"); +            llvm_unreachable("Unknown instruction!!");            }          }        } @@ -9266,7 +9170,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,          if (CI->isMinValue(Pred == ICmpInst::ICMP_SLT))            return ReplaceInstUsesWith(SI, FalseVal);          // X < C ? X : C-1  -->  X > C-1 ? C-1 : X -        Constant *AdjustedRHS = SubOne(CI, Context); +        Constant *AdjustedRHS = SubOne(CI);          if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||              (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {            Pred = ICmpInst::getSwappedPredicate(Pred); @@ -9286,7 +9190,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,          if (CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))            return ReplaceInstUsesWith(SI, FalseVal);          // X > C ? X : C+1  -->  X < C+1 ? C+1 : X -        Constant *AdjustedRHS = AddOne(CI, Context); +        Constant *AdjustedRHS = AddOne(CI);          if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||              (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {            Pred = ICmpInst::getSwappedPredicate(Pred); @@ -9323,10 +9227,10 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,          if ((Pred == ICmpInst::ICMP_SLT && Op1CV == 0) ||              (Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {            Value *In = ICI->getOperand(0); -          Value *Sh = Context->getConstantInt(In->getType(), +          Value *Sh = ConstantInt::get(In->getType(),                                         In->getType()->getScalarSizeInBits()-1);            In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh, -                                                          In->getName()+".lobit"), +                                                        In->getName()+".lobit"),                                     *ICI);            if (In->getType() != SI.getType())              In = CastInst::CreateIntegerCast(In, SI.getType(), @@ -9365,6 +9269,14 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,    return Changed ? &SI : 0;  } +/// isDefinedInBB - Return true if the value is an instruction defined in the +/// specified basicblock. +static bool isDefinedInBB(const Value *V, const BasicBlock *BB) { +  const Instruction *I = dyn_cast<Instruction>(V); +  return I != 0 && I->getParent() == BB; +} + +  Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {    Value *CondVal = SI.getCondition();    Value *TrueVal = SI.getTrueValue(); @@ -9390,7 +9302,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {        return ReplaceInstUsesWith(SI, FalseVal);    } -  if (SI.getType() == Type::Int1Ty) { +  if (SI.getType() == Type::getInt1Ty(*Context)) {      if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {        if (C->getZExtValue()) {          // Change: A = select B, true, C --> A = or B, C @@ -9438,26 +9350,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {        }        if (ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition())) { - -        // (x <s 0) ? -1 : 0 -> ashr x, 31 -        if (TrueValC->isAllOnesValue() && FalseValC->isZero()) -          if (ConstantInt *CmpCst = dyn_cast<ConstantInt>(IC->getOperand(1))) { -            if (IC->getPredicate() == ICmpInst::ICMP_SLT && CmpCst->isZero()) { -              // The comparison constant and the result are not neccessarily the -              // same width. Make an all-ones value by inserting a AShr. -              Value *X = IC->getOperand(0); -              uint32_t Bits = X->getType()->getScalarSizeInBits(); -              Constant *ShAmt = Context->getConstantInt(X->getType(), Bits-1); -              Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X, -                                                        ShAmt, "ones"); -              InsertNewInstBefore(SRA, SI); - -              // Then cast to the appropriate width. -              return CastInst::CreateIntegerCast(SRA, SI.getType(), true); -            } -          } - -          // If one of the constants is zero (we know they can't both be) and we          // have an icmp instruction with zero, and we have an 'and' with the          // non-constant value, eliminate this whole mess.  This corresponds to @@ -9568,10 +9460,11 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {              //        select C, (add X, Y), (sub X, Z)              Value *NegVal;  // Compute -Z              if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) { -              NegVal = Context->getConstantExprNeg(C); +              NegVal = ConstantExpr::getNeg(C);              } else {                NegVal = InsertNewInstBefore( -                    BinaryOperator::CreateNeg(SubOp->getOperand(1), "tmp"), SI); +                    BinaryOperator::CreateNeg(SubOp->getOperand(1), +                                              "tmp"), SI);              }              Value *NewTrueOp = OtherAddOp; @@ -9595,6 +9488,17 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {        return FoldI;    } +  // See if we can fold the select into a phi node.  The true/false values have +  // to be live in the predecessor blocks.  If they are instructions in SI's +  // block, we can't map to the predecessor. +  if (isa<PHINode>(SI.getCondition()) && +      (!isDefinedInBB(SI.getTrueValue(), SI.getParent()) || +       isa<PHINode>(SI.getTrueValue())) && +      (!isDefinedInBB(SI.getFalseValue(), SI.getParent()) || +       isa<PHINode>(SI.getFalseValue()))) +    if (Instruction *NV = FoldOpIntoPhi(SI)) +      return NV; +    if (BinaryOperator::isNot(CondVal)) {      SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));      SI.setOperand(1, FalseVal); @@ -9617,7 +9521,7 @@ static unsigned EnforceKnownAlignment(Value *V,    User *U = dyn_cast<User>(V);    if (!U) return Align; -  switch (getOpcode(U)) { +  switch (Operator::getOpcode(U)) {    default: break;    case Instruction::BitCast:      return EnforceKnownAlignment(U->getOperand(0), Align, PrefAlign); @@ -9650,16 +9554,13 @@ static unsigned EnforceKnownAlignment(Value *V,          Align = PrefAlign;        }      } -  } else if (AllocationInst *AI = dyn_cast<AllocationInst>(V)) { -    // If there is a requested alignment and if this is an alloca, round up.  We -    // don't do this for malloc, because some systems can't respect the request. -    if (isa<AllocaInst>(AI)) { -      if (AI->getAlignment() >= PrefAlign) -        Align = AI->getAlignment(); -      else { -        AI->setAlignment(PrefAlign); -        Align = PrefAlign; -      } +  } else if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) { +    // If there is a requested alignment and if this is an alloca, round up. +    if (AI->getAlignment() >= PrefAlign) +      Align = AI->getAlignment(); +    else { +      AI->setAlignment(PrefAlign); +      Align = PrefAlign;      }    } @@ -9694,7 +9595,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {    unsigned CopyAlign = MI->getAlignment();    if (CopyAlign < MinAlign) { -    MI->setAlignment(MinAlign); +    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),  +                                             MinAlign, false));      return MI;    } @@ -9715,7 +9617,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {    // Use an integer load+store unless we can find something better.    Type *NewPtrTy = -                Context->getPointerTypeUnqual(Context->getIntegerType(Size<<3)); +                PointerType::getUnqual(IntegerType::get(*Context, Size<<3));    // Memcpy forces the use of i8* for the source and destination.  That means    // that if you're using memcpy to move one double around, you'll get a cast @@ -9725,7 +9627,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {    // integer datatype.    if (Value *Op = getBitCastOperand(MI->getOperand(1))) {      const Type *SrcETy = cast<PointerType>(Op->getType())->getElementType(); -    if (SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { +    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {        // The SrcETy might be something like {{{double}}} or [1 x double].  Rip        // down through these levels if so.        while (!SrcETy->isSingleValueType()) { @@ -9744,7 +9646,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {        }        if (SrcETy->isSingleValueType()) -        NewPtrTy = Context->getPointerTypeUnqual(SrcETy); +        NewPtrTy = PointerType::getUnqual(SrcETy);      }    } @@ -9754,28 +9656,29 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {    SrcAlign = std::max(SrcAlign, CopyAlign);    DstAlign = std::max(DstAlign, CopyAlign); -  Value *Src = InsertBitCastBefore(MI->getOperand(2), NewPtrTy, *MI); -  Value *Dest = InsertBitCastBefore(MI->getOperand(1), NewPtrTy, *MI); +  Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewPtrTy); +  Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewPtrTy);    Instruction *L = new LoadInst(Src, "tmp", false, SrcAlign);    InsertNewInstBefore(L, *MI);    InsertNewInstBefore(new StoreInst(L, Dest, false, DstAlign), *MI);    // Set the size of the copy to 0, it will be deleted on the next iteration. -  MI->setOperand(3, Context->getNullValue(MemOpLength->getType())); +  MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));    return MI;  }  Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {    unsigned Alignment = GetOrEnforceKnownAlignment(MI->getDest());    if (MI->getAlignment() < Alignment) { -    MI->setAlignment(Alignment); +    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), +                                             Alignment, false));      return MI;    }    // Extract the length and alignment and fill if they are constant.    ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());    ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); -  if (!LenC || !FillC || FillC->getType() != Type::Int8Ty) +  if (!LenC || !FillC || FillC->getType() != Type::getInt8Ty(*Context))      return 0;    uint64_t Len = LenC->getZExtValue();    Alignment = MI->getAlignment(); @@ -9785,21 +9688,21 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {    // memset(s,c,n) -> store s, c (for n=1,2,4,8)    if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { -    const Type *ITy = Context->getIntegerType(Len*8);  // n=1 -> i8. +    const Type *ITy = IntegerType::get(*Context, Len*8);  // n=1 -> i8.      Value *Dest = MI->getDest(); -    Dest = InsertBitCastBefore(Dest, Context->getPointerTypeUnqual(ITy), *MI); +    Dest = Builder->CreateBitCast(Dest, PointerType::getUnqual(ITy));      // Alignment 0 is identity for alignment 1 for memset, but not store.      if (Alignment == 0) Alignment = 1;      // Extract the fill value and store.      uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; -    InsertNewInstBefore(new StoreInst(Context->getConstantInt(ITy, Fill), +    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),                                        Dest, false, Alignment), *MI);      // Set the size of the copy to 0, it will be deleted on the next iteration. -    MI->setLength(Context->getNullValue(LenC->getType())); +    MI->setLength(Constant::getNullValue(LenC->getType()));      return MI;    } @@ -9820,8 +9723,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      return &CI;    } -   -      IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);    if (!II) return visitCallSite(&CI); @@ -9891,9 +9792,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // Turn PPC lvx     -> load if the pointer is known aligned.      // Turn X86 loadups -> load if the pointer is known aligned.      if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { -      Value *Ptr = InsertBitCastBefore(II->getOperand(1), -                                   Context->getPointerTypeUnqual(II->getType()), -                                       CI); +      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), +                                         PointerType::getUnqual(II->getType()));        return new LoadInst(Ptr);      }      break; @@ -9902,8 +9802,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // Turn stvx -> store if the pointer is known aligned.      if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {        const Type *OpPtrTy =  -        Context->getPointerTypeUnqual(II->getOperand(1)->getType()); -      Value *Ptr = InsertBitCastBefore(II->getOperand(2), OpPtrTy, CI); +        PointerType::getUnqual(II->getOperand(1)->getType()); +      Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);        return new StoreInst(II->getOperand(1), Ptr);      }      break; @@ -9913,8 +9813,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      // Turn X86 storeu -> store if the pointer is known aligned.      if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {        const Type *OpPtrTy =  -        Context->getPointerTypeUnqual(II->getOperand(2)->getType()); -      Value *Ptr = InsertBitCastBefore(II->getOperand(1), OpPtrTy, CI); +        PointerType::getUnqual(II->getOperand(2)->getType()); +      Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);        return new StoreInst(II->getOperand(2), Ptr);      }      break; @@ -9951,9 +9851,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {        if (AllEltsOk) {          // Cast the input vectors to byte vectors. -        Value *Op0 =InsertBitCastBefore(II->getOperand(1),Mask->getType(),CI); -        Value *Op1 =InsertBitCastBefore(II->getOperand(2),Mask->getType(),CI); -        Value *Result = Context->getUndef(Op0->getType()); +        Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); +        Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); +        Value *Result = UndefValue::get(Op0->getType());          // Only extract each element once.          Value *ExtractedElts[32]; @@ -9966,16 +9866,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {            Idx &= 31;  // Match the hardware behavior.            if (ExtractedElts[Idx] == 0) { -            Instruction *Elt =  -              new ExtractElementInst(Idx < 16 ? Op0 : Op1, Idx&15, "tmp"); -            InsertNewInstBefore(Elt, CI); -            ExtractedElts[Idx] = Elt; +            ExtractedElts[Idx] =  +              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,  +                  ConstantInt::get(Type::getInt32Ty(*Context), Idx&15, false), +                                            "tmp");            }            // Insert this value into the result vector. -          Result = InsertElementInst::Create(Result, ExtractedElts[Idx], -                                             i, "tmp"); -          InsertNewInstBefore(cast<Instruction>(Result), CI); +          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], +                         ConstantInt::get(Type::getInt32Ty(*Context), i, false), +                                                "tmp");          }          return CastInst::Create(Instruction::BitCast, Result, CI.getType());        } @@ -9999,7 +9899,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {      TerminatorInst *TI = II->getParent()->getTerminator();      bool CannotRemove = false;      for (++BI; &*BI != TI; ++BI) { -      if (isa<AllocaInst>(BI)) { +      if (isa<AllocaInst>(BI) || isMalloc(BI)) {          CannotRemove = true;          break;        } @@ -10055,7 +9955,7 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,    const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();    if (!SrcTy->isSized() || !DstTy->isSized())      return false; -  if (TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy)) +  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))      return false;    return true;  } @@ -10076,11 +9976,13 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {        Instruction *OldCall = CS.getInstruction();        // If the call and callee calling conventions don't match, this call must        // be unreachable, as the call is undefined. -      new StoreInst(Context->getConstantIntTrue(), -                Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)),  +      new StoreInst(ConstantInt::getTrue(*Context), +                UndefValue::get(Type::getInt1PtrTy(*Context)),                                     OldCall); -      if (!OldCall->use_empty()) -        OldCall->replaceAllUsesWith(Context->getUndef(OldCall->getType())); +      // If OldCall dues not return void then replaceAllUsesWith undef. +      // This allows ValueHandlers and custom metadata to adjust itself. +      if (!OldCall->getType()->isVoidTy()) +        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));        if (isa<CallInst>(OldCall))   // Not worth removing an invoke here.          return EraseInstFromFunction(*OldCall);        return 0; @@ -10090,18 +9992,20 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {      // This instruction is not reachable, just remove it.  We insert a store to      // undef so that we know that this code is not reachable, despite the fact      // that we can't modify the CFG here. -    new StoreInst(Context->getConstantIntTrue(), -               Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), +    new StoreInst(ConstantInt::getTrue(*Context), +               UndefValue::get(Type::getInt1PtrTy(*Context)),                    CS.getInstruction()); -    if (!CS.getInstruction()->use_empty()) +    // If CS dues not return void then replaceAllUsesWith undef. +    // This allows ValueHandlers and custom metadata to adjust itself. +    if (!CS.getInstruction()->getType()->isVoidTy())        CS.getInstruction()-> -        replaceAllUsesWith(Context->getUndef(CS.getInstruction()->getType())); +        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));      if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {        // Don't break the CFG, insert a dummy cond branch.        BranchInst::Create(II->getNormalDest(), II->getUnwindDest(), -                         Context->getConstantIntTrue(), II); +                         ConstantInt::getTrue(*Context), II);      }      return EraseInstFromFunction(*CS.getInstruction());    } @@ -10165,13 +10069,15 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {      if (Callee->isDeclaration() &&          // Conversion is ok if changing from one pointer type to another or from          // a pointer to an integer of the same size. -        !((isa<PointerType>(OldRetTy) || OldRetTy == TD->getIntPtrType()) && -          (isa<PointerType>(NewRetTy) || NewRetTy == TD->getIntPtrType()))) +        !((isa<PointerType>(OldRetTy) || !TD || +           OldRetTy == TD->getIntPtrType(Caller->getContext())) && +          (isa<PointerType>(NewRetTy) || !TD || +           NewRetTy == TD->getIntPtrType(Caller->getContext()))))        return false;   // Cannot transform this return value.      if (!Caller->use_empty() &&          // void -> non-void is handled specially -        NewRetTy != Type::VoidTy && !CastInst::isCastable(NewRetTy, OldRetTy)) +        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))        return false;   // Cannot transform this return value.      if (!CallerPAL.isEmpty() && !Caller->use_empty()) { @@ -10212,8 +10118,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {      // Converting from one pointer type to another or between a pointer and an      // integer of the same size is safe even if we do not have a body.      bool isConvertible = ActTy == ParamTy || -      ((isa<PointerType>(ParamTy) || ParamTy == TD->getIntPtrType()) && -       (isa<PointerType>(ActTy) || ActTy == TD->getIntPtrType())); +      (TD && ((isa<PointerType>(ParamTy) || +      ParamTy == TD->getIntPtrType(Caller->getContext())) && +              (isa<PointerType>(ActTy) || +              ActTy == TD->getIntPtrType(Caller->getContext()))));      if (Callee->isDeclaration() && !isConvertible) return false;    } @@ -10260,8 +10168,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {      } else {        Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,            false, ParamTy, false); -      CastInst *NewCast = CastInst::Create(opcode, *AI, ParamTy, "tmp"); -      Args.push_back(InsertNewInstBefore(NewCast, *Caller)); +      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));      }      // Add any parameter attributes. @@ -10270,26 +10177,24 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {    }    // If the function takes more arguments than the call was taking, add them -  // now... +  // now.    for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) -    Args.push_back(Context->getNullValue(FT->getParamType(i))); +    Args.push_back(Constant::getNullValue(FT->getParamType(i))); -  // If we are removing arguments to the function, emit an obnoxious warning... +  // If we are removing arguments to the function, emit an obnoxious warning.    if (FT->getNumParams() < NumActualArgs) {      if (!FT->isVarArg()) { -      cerr << "WARNING: While resolving call to function '" -           << Callee->getName() << "' arguments were dropped!\n"; +      errs() << "WARNING: While resolving call to function '" +             << Callee->getName() << "' arguments were dropped!\n";      } else { -      // Add all of the arguments in their promoted form to the arg list... +      // Add all of the arguments in their promoted form to the arg list.        for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {          const Type *PTy = getPromotedType((*AI)->getType());          if (PTy != (*AI)->getType()) {            // Must promote to pass through va_arg area! -          Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false,  -                                                                PTy, false); -          Instruction *Cast = CastInst::Create(opcode, *AI, PTy, "tmp"); -          InsertNewInstBefore(Cast, *Caller); -          Args.push_back(Cast); +          Instruction::CastOps opcode = +            CastInst::getCastOpcode(*AI, false, PTy, false); +          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));          } else {            Args.push_back(*AI);          } @@ -10304,10 +10209,11 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {    if (Attributes FnAttrs =  CallerPAL.getFnAttributes())      attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); -  if (NewRetTy == Type::VoidTy) +  if (NewRetTy->isVoidTy())      Caller->setName("");   // Void type should not have a name. -  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),attrVec.end()); +  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(), +                                                     attrVec.end());    Instruction *NC;    if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -10329,7 +10235,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {    // Insert a cast of the return type as necessary.    Value *NV = NC;    if (OldRetTy != NV->getType() && !Caller->use_empty()) { -    if (NV->getType() != Type::VoidTy) { +    if (!NV->getType()->isVoidTy()) {        Instruction::CastOps opcode = CastInst::getCastOpcode(NC, false,                                                               OldRetTy, false);        NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp"); @@ -10343,16 +10249,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {          // Otherwise, it's a call, just insert cast right after the call instr          InsertNewInstBefore(NC, *Caller);        } -      AddUsersToWorkList(*Caller); +      Worklist.AddUsersToWorkList(*Caller);      } else { -      NV = Context->getUndef(Caller->getType()); +      NV = UndefValue::get(Caller->getType());      }    } -  if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) + +  if (!Caller->use_empty())      Caller->replaceAllUsesWith(NV); -  Caller->eraseFromParent(); -  RemoveFromWorkList(Caller); +   +  EraseInstFromFunction(*Caller);    return true;  } @@ -10469,14 +10376,14 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {        // Replace the trampoline call with a direct call.  Let the generic        // code sort out any function type mismatches. -      FunctionType *NewFTy = -                       Context->getFunctionType(FTy->getReturnType(), NewTypes,  +      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,                                                   FTy->isVarArg());        Constant *NewCallee = -        NestF->getType() == Context->getPointerTypeUnqual(NewFTy) ? -        NestF : Context->getConstantExprBitCast(NestF,  -                                         Context->getPointerTypeUnqual(NewFTy)); -      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),NewAttrs.end()); +        NestF->getType() == PointerType::getUnqual(NewFTy) ? +        NestF : ConstantExpr::getBitCast(NestF,  +                                         PointerType::getUnqual(NewFTy)); +      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(), +                                                   NewAttrs.end());        Instruction *NewCaller;        if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { @@ -10495,10 +10402,10 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {            setCallingConv(cast<CallInst>(Caller)->getCallingConv());          cast<CallInst>(NewCaller)->setAttributes(NewPAL);        } -      if (Caller->getType() != Type::VoidTy && !Caller->use_empty()) +      if (!Caller->getType()->isVoidTy())          Caller->replaceAllUsesWith(NewCaller);        Caller->eraseFromParent(); -      RemoveFromWorkList(Caller); +      Worklist.Remove(Caller);        return 0;      }    } @@ -10508,13 +10415,13 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {    // code sort out any function type mismatches.    Constant *NewCallee =      NestF->getType() == PTy ? NestF :  -                              Context->getConstantExprBitCast(NestF, PTy); +                              ConstantExpr::getBitCast(NestF, PTy);    CS.setCalledFunction(NewCallee);    return CS.getInstruction();  } -/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(c,d)] -/// and if a/b/c/d and the add's all have a single use, turn this into two phi's +/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] +/// and if a/b/c and the add's all have a single use, turn this into a phi  /// and a single binop.  Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {    Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0)); @@ -10526,8 +10433,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {    const Type *LHSType = LHSVal->getType();    const Type *RHSType = RHSVal->getType(); -  // Scan to see if all operands are the same opcode, all have one use, and all -  // kill their operands (i.e. the operands have one use). +  // Scan to see if all operands are the same opcode, and all have one use.    for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {      Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));      if (!I || I->getOpcode() != Opc || !I->hasOneUse() || @@ -10547,6 +10453,13 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {      if (I->getOperand(0) != LHSVal) LHSVal = 0;      if (I->getOperand(1) != RHSVal) RHSVal = 0;    } + +  // If both LHS and RHS would need a PHI, don't do this transformation, +  // because it would increase the number of PHIs entering the block, +  // which leads to higher register pressure. This is especially +  // bad when the PHIs are in the header of a loop. +  if (!LHSVal && !RHSVal) +    return 0;    // Otherwise, this is safe to transform! @@ -10589,8 +10502,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {    if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))      return BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);    CmpInst *CIOp = cast<CmpInst>(FirstInst); -  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, -                         RHSVal); +  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), +                         LHSVal, RHSVal);  }  Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { @@ -10601,9 +10514,13 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {    // This is true if all GEP bases are allocas and if all indices into them are    // constants.    bool AllBasePointersAreAllocas = true; + +  // We don't want to replace this phi if the replacement would require +  // more than one phi, which leads to higher register pressure. This is +  // especially bad when the PHIs are in the header of a loop. +  bool NeededPhi = false; -  // Scan to see if all operands are the same opcode, all have one use, and all -  // kill their operands (i.e. the operands have one use). +  // Scan to see if all operands are the same opcode, and all have one use.    for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {      GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));      if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || @@ -10632,7 +10549,16 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {        if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())          return 0; + +      // If we already needed a PHI for an earlier operand, and another operand +      // also requires a PHI, we'd be introducing more PHIs than we're +      // eliminating, which increases register pressure on entry to the PHI's +      // block. +      if (NeededPhi) +        return 0; +        FixedOperands[op] = 0;  // Needs a PHI. +      NeededPhi = true;      }    } @@ -10678,8 +10604,11 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {    }    Value *Base = FixedOperands[0]; -  return GetElementPtrInst::Create(Base, FixedOperands.begin()+1, -                                   FixedOperands.end()); +  return cast<GEPOperator>(FirstInst)->isInBounds() ? +    GetElementPtrInst::CreateInBounds(Base, FixedOperands.begin()+1, +                                      FixedOperands.end()) : +    GetElementPtrInst::Create(Base, FixedOperands.begin()+1, +                              FixedOperands.end());  } @@ -10836,7 +10765,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {    if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst))      return BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);    if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) -    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),  +    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),                             PhiVal, ConstantOp);    assert(isa<LoadInst>(FirstInst) && "Unknown operation"); @@ -10929,7 +10858,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {        SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;        PotentiallyDeadPHIs.insert(&PN);        if (DeadPHICycle(PU, PotentiallyDeadPHIs)) -        return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType())); +        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));      }      // If this phi has a single use, and if that use just computes a value for @@ -10941,7 +10870,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {      if (PHIUser->hasOneUse() &&          (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&          PHIUser->use_back() == &PN) { -      return ReplaceInstUsesWith(PN, Context->getUndef(PN.getType())); +      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));      }    } @@ -10982,30 +10911,14 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {    return 0;  } -static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy, -                                   Instruction *InsertPoint, -                                   InstCombiner *IC) { -  unsigned PtrSize = DTy->getScalarSizeInBits(); -  unsigned VTySize = V->getType()->getScalarSizeInBits(); -  // We must cast correctly to the pointer type. Ensure that we -  // sign extend the integer value if it is smaller as this is -  // used for address computation. -  Instruction::CastOps opcode =  -     (VTySize < PtrSize ? Instruction::SExt : -      (VTySize == PtrSize ? Instruction::BitCast : Instruction::Trunc)); -  return IC->InsertCastBefore(opcode, V, DTy, *InsertPoint); -} - -  Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {    Value *PtrOp = GEP.getOperand(0); -  // Is it 'getelementptr %P, i32 0'  or 'getelementptr %P' -  // If so, eliminate the noop. +  // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops.    if (GEP.getNumOperands() == 1)      return ReplaceInstUsesWith(GEP, PtrOp);    if (isa<UndefValue>(GEP.getOperand(0))) -    return ReplaceInstUsesWith(GEP, Context->getUndef(GEP.getType())); +    return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType()));    bool HasZeroPointerIndex = false;    if (Constant *C = dyn_cast<Constant>(GEP.getOperand(1))) @@ -11015,78 +10928,48 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {      return ReplaceInstUsesWith(GEP, PtrOp);    // Eliminate unneeded casts for indices. -  bool MadeChange = false; -   -  gep_type_iterator GTI = gep_type_begin(GEP); -  for (User::op_iterator i = GEP.op_begin() + 1, e = GEP.op_end(); -       i != e; ++i, ++GTI) { -    if (isa<SequentialType>(*GTI)) { -      if (CastInst *CI = dyn_cast<CastInst>(*i)) { -        if (CI->getOpcode() == Instruction::ZExt || -            CI->getOpcode() == Instruction::SExt) { -          const Type *SrcTy = CI->getOperand(0)->getType(); -          // We can eliminate a cast from i32 to i64 iff the target  -          // is a 32-bit pointer target. -          if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) { -            MadeChange = true; -            *i = CI->getOperand(0); -          } -        } -      } +  if (TD) { +    bool MadeChange = false; +    unsigned PtrSize = TD->getPointerSizeInBits(); +     +    gep_type_iterator GTI = gep_type_begin(GEP); +    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); +         I != E; ++I, ++GTI) { +      if (!isa<SequentialType>(*GTI)) continue; +              // If we are using a wider index than needed for this platform, shrink it -      // to what we need.  If narrower, sign-extend it to what we need. -      // If the incoming value needs a cast instruction, -      // insert it.  This explicit cast can make subsequent optimizations more -      // obvious. -      Value *Op = *i; -      if (TD->getTypeSizeInBits(Op->getType()) > TD->getPointerSizeInBits()) { -        if (Constant *C = dyn_cast<Constant>(Op)) { -          *i = Context->getConstantExprTrunc(C, TD->getIntPtrType()); -          MadeChange = true; -        } else { -          Op = InsertCastBefore(Instruction::Trunc, Op, TD->getIntPtrType(), -                                GEP); -          *i = Op; -          MadeChange = true; -        } -      } else if (TD->getTypeSizeInBits(Op->getType()) < TD->getPointerSizeInBits()) { -        if (Constant *C = dyn_cast<Constant>(Op)) { -          *i = Context->getConstantExprSExt(C, TD->getIntPtrType()); -          MadeChange = true; -        } else { -          Op = InsertCastBefore(Instruction::SExt, Op, TD->getIntPtrType(), -                                GEP); -          *i = Op; -          MadeChange = true; -        } -      } +      // to what we need.  If narrower, sign-extend it to what we need.  This +      // explicit cast can make subsequent optimizations more obvious. +      unsigned OpBits = cast<IntegerType>((*I)->getType())->getBitWidth(); +      if (OpBits == PtrSize) +        continue; +       +      *I = Builder->CreateIntCast(*I, TD->getIntPtrType(GEP.getContext()),true); +      MadeChange = true;      } +    if (MadeChange) return &GEP;    } -  if (MadeChange) return &GEP;    // Combine Indices - If the source pointer to this getelementptr instruction    // is a getelementptr instruction, combine the indices of the two    // getelementptr instructions into a single instruction.    // -  SmallVector<Value*, 8> SrcGEPOperands; -  if (User *Src = dyn_castGetElementPtr(PtrOp)) -    SrcGEPOperands.append(Src->op_begin(), Src->op_end()); - -  if (!SrcGEPOperands.empty()) { +  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {      // Note that if our source is a gep chain itself that we wait for that      // chain to be resolved before we perform this transformation.  This      // avoids us creating a TON of code in some cases.      // -    if (isa<GetElementPtrInst>(SrcGEPOperands[0]) && -        cast<Instruction>(SrcGEPOperands[0])->getNumOperands() == 2) -      return 0;   // Wait until our source is folded to completion. +    if (GetElementPtrInst *SrcGEP = +          dyn_cast<GetElementPtrInst>(Src->getOperand(0))) +      if (SrcGEP->getNumOperands() == 2) +        return 0;   // Wait until our source is folded to completion.      SmallVector<Value*, 8> Indices;      // Find out whether the last index in the source GEP is a sequential idx.      bool EndsWithSequential = false; -    for (gep_type_iterator I = gep_type_begin(*cast<User>(PtrOp)), -           E = gep_type_end(*cast<User>(PtrOp)); I != E; ++I) +    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src); +         I != E; ++I)        EndsWithSequential = !isa<StructType>(*I);      // Can we combine the two pointer arithmetics offsets? @@ -11094,98 +10977,68 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        // Replace: gep (gep %P, long B), long A, ...        // With:    T = long A+B; gep %P, T, ...        // -      Value *Sum, *SO1 = SrcGEPOperands.back(), *GO1 = GEP.getOperand(1); -      if (SO1 == Context->getNullValue(SO1->getType())) { +      Value *Sum; +      Value *SO1 = Src->getOperand(Src->getNumOperands()-1); +      Value *GO1 = GEP.getOperand(1); +      if (SO1 == Constant::getNullValue(SO1->getType())) {          Sum = GO1; -      } else if (GO1 == Context->getNullValue(GO1->getType())) { +      } else if (GO1 == Constant::getNullValue(GO1->getType())) {          Sum = SO1;        } else { -        // If they aren't the same type, convert both to an integer of the -        // target's pointer size. -        if (SO1->getType() != GO1->getType()) { -          if (Constant *SO1C = dyn_cast<Constant>(SO1)) { -            SO1 = -                Context->getConstantExprIntegerCast(SO1C, GO1->getType(), true); -          } else if (Constant *GO1C = dyn_cast<Constant>(GO1)) { -            GO1 = -                Context->getConstantExprIntegerCast(GO1C, SO1->getType(), true); -          } else { -            unsigned PS = TD->getPointerSizeInBits(); -            if (TD->getTypeSizeInBits(SO1->getType()) == PS) { -              // Convert GO1 to SO1's type. -              GO1 = InsertCastToIntPtrTy(GO1, SO1->getType(), &GEP, this); - -            } else if (TD->getTypeSizeInBits(GO1->getType()) == PS) { -              // Convert SO1 to GO1's type. -              SO1 = InsertCastToIntPtrTy(SO1, GO1->getType(), &GEP, this); -            } else { -              const Type *PT = TD->getIntPtrType(); -              SO1 = InsertCastToIntPtrTy(SO1, PT, &GEP, this); -              GO1 = InsertCastToIntPtrTy(GO1, PT, &GEP, this); -            } -          } -        } -        if (isa<Constant>(SO1) && isa<Constant>(GO1)) -          Sum = Context->getConstantExprAdd(cast<Constant>(SO1),  -                                            cast<Constant>(GO1)); -        else { -          Sum = BinaryOperator::CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); -          InsertNewInstBefore(cast<Instruction>(Sum), GEP); -        } +        // If they aren't the same type, then the input hasn't been processed +        // by the loop above yet (which canonicalizes sequential index types to +        // intptr_t).  Just avoid transforming this until the input has been +        // normalized. +        if (SO1->getType() != GO1->getType()) +          return 0; +        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");        } -      // Recycle the GEP we already have if possible. -      if (SrcGEPOperands.size() == 2) { -        GEP.setOperand(0, SrcGEPOperands[0]); +      // Update the GEP in place if possible. +      if (Src->getNumOperands() == 2) { +        GEP.setOperand(0, Src->getOperand(0));          GEP.setOperand(1, Sum);          return &GEP; -      } else { -        Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, -                       SrcGEPOperands.end()-1); -        Indices.push_back(Sum); -        Indices.insert(Indices.end(), GEP.op_begin()+2, GEP.op_end());        } +      Indices.append(Src->op_begin()+1, Src->op_end()-1); +      Indices.push_back(Sum); +      Indices.append(GEP.op_begin()+2, GEP.op_end());      } else if (isa<Constant>(*GEP.idx_begin()) &&                 cast<Constant>(*GEP.idx_begin())->isNullValue() && -               SrcGEPOperands.size() != 1) { +               Src->getNumOperands() != 1) {        // Otherwise we can do the fold if the first index of the GEP is a zero -      Indices.insert(Indices.end(), SrcGEPOperands.begin()+1, -                     SrcGEPOperands.end()); -      Indices.insert(Indices.end(), GEP.idx_begin()+1, GEP.idx_end()); +      Indices.append(Src->op_begin()+1, Src->op_end()); +      Indices.append(GEP.idx_begin()+1, GEP.idx_end());      }      if (!Indices.empty()) -      return GetElementPtrInst::Create(SrcGEPOperands[0], Indices.begin(), -                                       Indices.end(), GEP.getName()); - -  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(PtrOp)) { -    // GEP of global variable.  If all of the indices for this GEP are -    // constants, we can promote this to a constexpr instead of an instruction. - -    // Scan for nonconstants... -    SmallVector<Constant*, 8> Indices; -    User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); -    for (; I != E && isa<Constant>(*I); ++I) -      Indices.push_back(cast<Constant>(*I)); - -    if (I == E) {  // If they are all constants... -      Constant *CE = Context->getConstantExprGetElementPtr(GV, -                                                    &Indices[0],Indices.size()); - -      // Replace all uses of the GEP with the new constexpr... -      return ReplaceInstUsesWith(GEP, CE); -    } -  } else if (Value *X = getBitCastOperand(PtrOp)) {  // Is the operand a cast? -    if (!isa<PointerType>(X->getType())) { -      // Not interesting.  Source pointer must be a cast from pointer. -    } else if (HasZeroPointerIndex) { -      // transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... -      // into     : GEP [10 x i8]* X, i32 0, ... -      // -      // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... -      //           into     : GEP i8* X, ... -      //  -      // This occurs when the program declares an array extern like "int X[];" +      return (cast<GEPOperator>(&GEP)->isInBounds() && +              Src->isInBounds()) ? +        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(), +                                          Indices.end(), GEP.getName()) : +        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), +                                  Indices.end(), GEP.getName()); +  } +   +  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). +  if (Value *X = getBitCastOperand(PtrOp)) { +    assert(isa<PointerType>(X->getType()) && "Must be cast from pointer"); + +    // If the input bitcast is actually "bitcast(bitcast(x))", then we don't  +    // want to change the gep until the bitcasts are eliminated. +    if (getBitCastOperand(X)) { +      Worklist.AddValue(PtrOp); +      return 0; +    } +     +    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... +    // into     : GEP [10 x i8]* X, i32 0, ... +    // +    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... +    //           into     : GEP i8* X, ... +    //  +    // This occurs when the program declares an array extern like "int X[];" +    if (HasZeroPointerIndex) {        const PointerType *CPTy = cast<PointerType>(PtrOp->getType());        const PointerType *XTy = cast<PointerType>(X->getType());        if (const ArrayType *CATy = @@ -11194,10 +11047,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {          if (CATy->getElementType() == XTy->getElementType()) {            // -> GEP i8* X, ...            SmallVector<Value*, 8> Indices(GEP.idx_begin()+1, GEP.idx_end()); -          return GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), -                                           GEP.getName()); -        } else if (const ArrayType *XATy = -                 dyn_cast<ArrayType>(XTy->getElementType())) { +          return cast<GEPOperator>(&GEP)->isInBounds() ? +            GetElementPtrInst::CreateInBounds(X, Indices.begin(), Indices.end(), +                                              GEP.getName()) : +            GetElementPtrInst::Create(X, Indices.begin(), Indices.end(), +                                      GEP.getName()); +        } +         +        if (const ArrayType *XATy = dyn_cast<ArrayType>(XTy->getElementType())){            // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?            if (CATy->getElementType() == XATy->getElementType()) {              // -> GEP [10 x i8]* X, i32 0, ... @@ -11216,16 +11073,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast        const Type *SrcElTy = cast<PointerType>(X->getType())->getElementType();        const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType(); -      if (isa<ArrayType>(SrcElTy) && +      if (TD && isa<ArrayType>(SrcElTy) &&            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==            TD->getTypeAllocSize(ResElTy)) {          Value *Idx[2]; -        Idx[0] = Context->getNullValue(Type::Int32Ty); +        Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));          Idx[1] = GEP.getOperand(1); -        Value *V = InsertNewInstBefore( -               GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()), GEP); +        Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? +          Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : +          Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());          // V and GEP are both pointer types --> BitCast -        return new BitCastInst(V, GEP.getType()); +        return new BitCastInst(NewGEP, GEP.getType());        }        // Transform things like: @@ -11233,7 +11091,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        //   (where tmp = 8*tmp2) into:        // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast -      if (isa<ArrayType>(SrcElTy) && ResElTy == Type::Int8Ty) { +      if (TD && isa<ArrayType>(SrcElTy) && ResElTy == Type::getInt8Ty(*Context)) {          uint64_t ArrayEltSize =              TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()); @@ -11243,17 +11101,16 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {          ConstantInt *Scale = 0;          if (ArrayEltSize == 1) {            NewIdx = GEP.getOperand(1); -          Scale =  -               Context->getConstantInt(cast<IntegerType>(NewIdx->getType()), 1); +          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);          } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) { -          NewIdx = Context->getConstantInt(CI->getType(), 1); +          NewIdx = ConstantInt::get(CI->getType(), 1);            Scale = CI;          } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){            if (Inst->getOpcode() == Instruction::Shl &&                isa<ConstantInt>(Inst->getOperand(1))) {              ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));              uint32_t ShAmtVal = ShAmt->getLimitedValue(64); -            Scale = Context->getConstantInt(cast<IntegerType>(Inst->getType()), +            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),                                       1ULL << ShAmtVal);              NewIdx = Inst->getOperand(0);            } else if (Inst->getOpcode() == Instruction::Mul && @@ -11269,23 +11126,21 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {          // operation after making sure Scale doesn't have the sign bit set.          if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&              Scale->getZExtValue() % ArrayEltSize == 0) { -          Scale = Context->getConstantInt(Scale->getType(), +          Scale = ConstantInt::get(Scale->getType(),                                     Scale->getZExtValue() / ArrayEltSize);            if (Scale->getZExtValue() != 1) { -            Constant *C = -                   Context->getConstantExprIntegerCast(Scale, NewIdx->getType(), +            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),                                                         false /*ZExt*/); -            Instruction *Sc = BinaryOperator::CreateMul(NewIdx, C, "idxscale"); -            NewIdx = InsertNewInstBefore(Sc, GEP); +            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");            }            // Insert the new GEP instruction.            Value *Idx[2]; -          Idx[0] = Context->getNullValue(Type::Int32Ty); +          Idx[0] = Constant::getNullValue(Type::getInt32Ty(*Context));            Idx[1] = NewIdx; -          Instruction *NewGEP = -            GetElementPtrInst::Create(X, Idx, Idx + 2, GEP.getName()); -          NewGEP = InsertNewInstBefore(NewGEP, GEP); +          Value *NewGEP = cast<GEPOperator>(&GEP)->isInBounds() ? +            Builder->CreateInBoundsGEP(X, Idx, Idx + 2, GEP.getName()) : +            Builder->CreateGEP(X, Idx, Idx + 2, GEP.getName());            // The NewGEP must be pointer typed, so must the old one -> BitCast            return new BitCastInst(NewGEP, GEP.getType());          } @@ -11294,12 +11149,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {    }    /// See if we can simplify: -  ///   X = bitcast A to B* +  ///   X = bitcast A* to B*    ///   Y = gep X, <...constant indices...>    /// into a gep of the original struct.  This is important for SROA and alias    /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.    if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { -    if (!isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { +    if (TD && +        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {        // Determine how much the GEP moves the pointer.  We are guaranteed to get        // a constant back from EmitGEPOffset.        ConstantInt *OffsetV = @@ -11311,7 +11167,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        if (Offset == 0) {          // If the bitcast is of an allocation, and the allocation will be          // converted to match the type of the cast, don't touch this. -        if (isa<AllocationInst>(BCI->getOperand(0))) { +        if (isa<AllocationInst>(BCI->getOperand(0)) || +            isMalloc(BCI->getOperand(0))) {            // See if the bitcast simplifies, if so, don't nuke this GEP yet.            if (Instruction *I = visitBitCast(*BCI)) {              if (I != BCI) { @@ -11332,11 +11189,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        const Type *InTy =          cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();        if (FindElementAtOffset(InTy, Offset, NewIndices, TD, Context)) { -        Instruction *NGEP = -           GetElementPtrInst::Create(BCI->getOperand(0), NewIndices.begin(), -                                     NewIndices.end()); -        if (NGEP->getType() == GEP.getType()) return NGEP; -        InsertNewInstBefore(NGEP, GEP); +        Value *NGEP = cast<GEPOperator>(&GEP)->isInBounds() ? +          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(), +                                     NewIndices.end()) : +          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(), +                             NewIndices.end()); +         +        if (NGEP->getType() == GEP.getType()) +          return ReplaceInstUsesWith(GEP, NGEP);          NGEP->takeName(&GEP);          return new BitCastInst(NGEP, GEP.getType());        } @@ -11351,18 +11211,17 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {    if (AI.isArrayAllocation()) {  // Check C != 1      if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {        const Type *NewTy =  -        Context->getArrayType(AI.getAllocatedType(), C->getZExtValue()); +        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());        AllocationInst *New = 0;        // Create and insert the replacement instruction...        if (isa<MallocInst>(AI)) -        New = new MallocInst(NewTy, 0, AI.getAlignment(), AI.getName()); +        New = Builder->CreateMalloc(NewTy, 0, AI.getName());        else {          assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!"); -        New = new AllocaInst(NewTy, 0, AI.getAlignment(), AI.getName()); +        New = Builder->CreateAlloca(NewTy, 0, AI.getName());        } - -      InsertNewInstBefore(New, AI); +      New->setAlignment(AI.getAlignment());        // Scan to the end of the allocation instructions, to skip over a block of        // allocas if possible...also skip interleaved debug info @@ -11373,27 +11232,27 @@ Instruction *InstCombiner::visitAllocationInst(AllocationInst &AI) {        // Now that I is pointing to the first non-allocation-inst in the block,        // insert our getelementptr instruction...        // -      Value *NullIdx = Context->getNullValue(Type::Int32Ty); +      Value *NullIdx = Constant::getNullValue(Type::getInt32Ty(*Context));        Value *Idx[2];        Idx[0] = NullIdx;        Idx[1] = NullIdx; -      Value *V = GetElementPtrInst::Create(New, Idx, Idx + 2, -                                           New->getName()+".sub", It); +      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2, +                                                   New->getName()+".sub", It);        // Now make everything use the getelementptr instead of the original        // allocation.        return ReplaceInstUsesWith(AI, V);      } else if (isa<UndefValue>(AI.getArraySize())) { -      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType())); +      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));      }    } -  if (isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) { +  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {      // If alloca'ing a zero byte object, replace the alloca with a null pointer.      // Note that we only do this for alloca's, because malloc should allocate      // and return a unique pointer, even for a zero byte allocation.      if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) -      return ReplaceInstUsesWith(AI, Context->getNullValue(AI.getType())); +      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));      // If the alignment is 0 (unspecified), assign it the preferred alignment.      if (AI.getAlignment() == 0) @@ -11409,8 +11268,8 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {    // free undef -> unreachable.    if (isa<UndefValue>(Op)) {      // Insert a new store to null because we cannot modify the CFG here. -    new StoreInst(Context->getConstantIntTrue(), -           Context->getUndef(Context->getPointerTypeUnqual(Type::Int1Ty)), &FI); +    new StoreInst(ConstantInt::getTrue(*Context), +           UndefValue::get(Type::getInt1PtrTy(*Context)), &FI);      return EraseInstFromFunction(FI);    } @@ -11428,7 +11287,7 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {    // Change free (gep X, 0,0,0,0) into free(X)    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {      if (GEPI->hasAllZeroIndices()) { -      AddToWorkList(GEPI); +      Worklist.Add(GEPI);        FI.setOperand(0, GEPI->getOperand(0));        return &FI;      } @@ -11440,6 +11299,21 @@ Instruction *InstCombiner::visitFreeInst(FreeInst &FI) {        EraseInstFromFunction(FI);        return EraseInstFromFunction(*MI);      } +  if (isMalloc(Op)) { +    if (CallInst* CI = extractMallocCallFromBitCast(Op)) { +      if (Op->hasOneUse() && CI->hasOneUse()) { +        EraseInstFromFunction(FI); +        EraseInstFromFunction(*CI); +        return EraseInstFromFunction(*cast<Instruction>(Op)); +      } +    } else { +      // Op is a call to malloc +      if (Op->hasOneUse()) { +        EraseInstFromFunction(FI); +        return EraseInstFromFunction(*cast<Instruction>(Op)); +      } +    } +  }    return 0;  } @@ -11450,7 +11324,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,                                          const TargetData *TD) {    User *CI = cast<User>(LI.getOperand(0));    Value *CastOp = CI->getOperand(0); -  LLVMContext* Context = IC.getContext(); +  LLVMContext *Context = IC.getContext();    if (TD) {      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(CI)) { @@ -11479,7 +11353,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,              SingleChar = 0;              StrVal = (StrVal << 8) | SingleChar;            } -          Value *NL = Context->getConstantInt(StrVal); +          Value *NL = ConstantInt::get(*Context, StrVal);            return IC.ReplaceInstUsesWith(LI, NL);          }        } @@ -11505,26 +11379,26 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,          if (Constant *CSrc = dyn_cast<Constant>(CastOp))            if (ASrcTy->getNumElements() != 0) {              Value *Idxs[2]; -            Idxs[0] = Idxs[1] = Context->getNullValue(Type::Int32Ty); -            CastOp = Context->getConstantExprGetElementPtr(CSrc, Idxs, 2); +            Idxs[0] = Idxs[1] = Constant::getNullValue(Type::getInt32Ty(*Context)); +            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);              SrcTy = cast<PointerType>(CastOp->getType());              SrcPTy = SrcTy->getElementType();            } -      if ((SrcPTy->isInteger() || isa<PointerType>(SrcPTy) ||  +      if (IC.getTargetData() && +          (SrcPTy->isInteger() || isa<PointerType>(SrcPTy) ||               isa<VectorType>(SrcPTy)) &&            // Do not allow turning this into a load of an integer, which is then            // casted to a pointer, this pessimizes pointer analysis a lot.            (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) && -          IC.getTargetData().getTypeSizeInBits(SrcPTy) == -               IC.getTargetData().getTypeSizeInBits(DestPTy)) { +          IC.getTargetData()->getTypeSizeInBits(SrcPTy) == +               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {          // Okay, we are casting from one integer or pointer type to another of          // the same size.  Instead of casting the pointer before the load, cast          // the result of the loaded value. -        Value *NewLoad = IC.InsertNewInstBefore(new LoadInst(CastOp, -                                                             CI->getName(), -                                                         LI.isVolatile()),LI); +        Value *NewLoad =  +          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());          // Now cast the result of the load.          return new BitCastInst(NewLoad, LI.getType());        } @@ -11537,14 +11411,16 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {    Value *Op = LI.getOperand(0);    // Attempt to improve the alignment. -  unsigned KnownAlign = -    GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); -  if (KnownAlign > -      (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : -                                LI.getAlignment())) -    LI.setAlignment(KnownAlign); - -  // load (cast X) --> cast (load X) iff safe +  if (TD) { +    unsigned KnownAlign = +      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType())); +    if (KnownAlign > +        (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) : +                                  LI.getAlignment())) +      LI.setAlignment(KnownAlign); +  } + +  // load (cast X) --> cast (load X) iff safe.    if (isa<CastInst>(Op))      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))        return Res; @@ -11562,29 +11438,28 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {      const Value *GEPI0 = GEPI->getOperand(0);      // TODO: Consider a target hook for valid address spaces for this xform. -    if (isa<ConstantPointerNull>(GEPI0) && -        cast<PointerType>(GEPI0->getType())->getAddressSpace() == 0) { +    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){        // Insert a new store to null instruction before the load to indicate        // that this code is not reachable.  We do this instead of inserting        // an unreachable instruction directly because we cannot modify the        // CFG. -      new StoreInst(Context->getUndef(LI.getType()), -                    Context->getNullValue(Op->getType()), &LI); -      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); +      new StoreInst(UndefValue::get(LI.getType()), +                    Constant::getNullValue(Op->getType()), &LI); +      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));      }    }     if (Constant *C = dyn_cast<Constant>(Op)) {      // load null/undef -> undef      // TODO: Consider a target hook for valid address spaces for this xform. -    if (isa<UndefValue>(C) || (C->isNullValue() &&  -        cast<PointerType>(Op->getType())->getAddressSpace() == 0)) { +    if (isa<UndefValue>(C) || +        (C->isNullValue() && LI.getPointerAddressSpace() == 0)) {        // Insert a new store to null instruction before the load to indicate that        // this code is not reachable.  We do this instead of inserting an        // unreachable instruction directly because we cannot modify the CFG. -      new StoreInst(Context->getUndef(LI.getType()), -                    Context->getNullValue(Op->getType()), &LI); -      return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); +      new StoreInst(UndefValue::get(LI.getType()), +                    Constant::getNullValue(Op->getType()), &LI); +      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));      }      // Instcombine load (constant global) into the value loaded. @@ -11605,9 +11480,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {            // that this code is not reachable.  We do this instead of inserting            // an unreachable instruction directly because we cannot modify the            // CFG. -          new StoreInst(Context->getUndef(LI.getType()), -                        Context->getNullValue(Op->getType()), &LI); -          return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); +          new StoreInst(UndefValue::get(LI.getType()), +                        Constant::getNullValue(Op->getType()), &LI); +          return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));          }        } else if (CE->isCast()) { @@ -11622,9 +11497,9 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op->getUnderlyingObject())){      if (GV->isConstant() && GV->hasDefinitiveInitializer()) {        if (GV->getInitializer()->isNullValue()) -        return ReplaceInstUsesWith(LI, Context->getNullValue(LI.getType())); +        return ReplaceInstUsesWith(LI, Constant::getNullValue(LI.getType()));        else if (isa<UndefValue>(GV->getInitializer())) -        return ReplaceInstUsesWith(LI, Context->getUndef(LI.getType())); +        return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));      }    } @@ -11643,10 +11518,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {        // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).        if (isSafeToLoadUnconditionally(SI->getOperand(1), SI) &&            isSafeToLoadUnconditionally(SI->getOperand(2), SI)) { -        Value *V1 = InsertNewInstBefore(new LoadInst(SI->getOperand(1), -                                     SI->getOperand(1)->getName()+".val"), LI); -        Value *V2 = InsertNewInstBefore(new LoadInst(SI->getOperand(2), -                                     SI->getOperand(2)->getName()+".val"), LI); +        Value *V1 = Builder->CreateLoad(SI->getOperand(1), +                                        SI->getOperand(1)->getName()+".val"); +        Value *V2 = Builder->CreateLoad(SI->getOperand(2), +                                        SI->getOperand(2)->getName()+".val");          return SelectInst::Create(SI->getCondition(), V1, V2);        } @@ -11674,7 +11549,6 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {  static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {    User *CI = cast<User>(SI.getOperand(1));    Value *CastOp = CI->getOperand(0); -  LLVMContext* Context = IC.getContext();    const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();    const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); @@ -11696,7 +11570,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {    // constants.    if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {      // Index through pointer. -    Constant *Zero = Context->getNullValue(Type::Int32Ty); +    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(*IC.getContext()));      NewGEPIndices.push_back(Zero);      while (1) { @@ -11713,7 +11587,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {        }      } -    SrcTy = Context->getPointerType(SrcPTy, SrcTy->getAddressSpace()); +    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());    }    if (!SrcPTy->isInteger() && !isa<PointerType>(SrcPTy)) @@ -11721,10 +11595,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {    // If the pointers point into different address spaces or if they point to    // values with different sizes, we can't do the transformation. -  if (SrcTy->getAddressSpace() !=  +  if (!IC.getTargetData() || +      SrcTy->getAddressSpace() !=           cast<PointerType>(CI->getType())->getAddressSpace() || -      IC.getTargetData().getTypeSizeInBits(SrcPTy) != -      IC.getTargetData().getTypeSizeInBits(DestPTy)) +      IC.getTargetData()->getTypeSizeInBits(SrcPTy) != +      IC.getTargetData()->getTypeSizeInBits(DestPTy))      return 0;    // Okay, we are casting from one integer or pointer type to another of @@ -11745,22 +11620,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {    // SIOp0 is a pointer to aggregate and this is a store to the first field,    // emit a GEP to index into its first field. -  if (!NewGEPIndices.empty()) { -    if (Constant *C = dyn_cast<Constant>(CastOp)) -      CastOp = Context->getConstantExprGetElementPtr(C, &NewGEPIndices[0],  -                                              NewGEPIndices.size()); -    else -      CastOp = IC.InsertNewInstBefore( -              GetElementPtrInst::Create(CastOp, NewGEPIndices.begin(), -                                        NewGEPIndices.end()), SI); -  } +  if (!NewGEPIndices.empty()) +    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(), +                                           NewGEPIndices.end()); -  if (Constant *C = dyn_cast<Constant>(SIOp0)) -    NewCast = Context->getConstantExprCast(opcode, C, CastDstTy); -  else -    NewCast = IC.InsertNewInstBefore( -      CastInst::Create(opcode, SIOp0, CastDstTy, SIOp0->getName()+".c"),  -      SI); +  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, +                                   SIOp0->getName()+".c");    return new StoreInst(NewCast, CastOp);  } @@ -11777,12 +11642,16 @@ static bool equivalentAddressValues(Value *A, Value *B) {    if (A == B) return true;    // Test if the values come form identical arithmetic instructions. +  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because +  // its only used to compare two uses within the same basic block, which +  // means that they'll always either have the same value or one of them +  // will have an undefined value.    if (isa<BinaryOperator>(A) ||        isa<CastInst>(A) ||        isa<PHINode>(A) ||        isa<GetElementPtrInst>(A))      if (Instruction *BI = dyn_cast<Instruction>(B)) -      if (cast<Instruction>(A)->isIdenticalTo(BI)) +      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))          return true;    // Otherwise they may not be equivalent. @@ -11854,12 +11723,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {    }    // Attempt to improve the alignment. -  unsigned KnownAlign = -    GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); -  if (KnownAlign > -      (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : -                                SI.getAlignment())) -    SI.setAlignment(KnownAlign); +  if (TD) { +    unsigned KnownAlign = +      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); +    if (KnownAlign > +        (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) : +                                  SI.getAlignment())) +      SI.setAlignment(KnownAlign); +  }    // Do really simple DSE, to catch cases where there are several consecutive    // stores to the same location, separated by a few arithmetic operations. This @@ -11914,12 +11785,11 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {    if (SI.isVolatile()) return 0;  // Don't hack volatile stores.    // store X, null    -> turns into 'unreachable' in SimplifyCFG -  if (isa<ConstantPointerNull>(Ptr) && -      cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) { +  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {      if (!isa<UndefValue>(Val)) { -      SI.setOperand(0, Context->getUndef(Val->getType())); +      SI.setOperand(0, UndefValue::get(Val->getType()));        if (Instruction *U = dyn_cast<Instruction>(Val)) -        AddToWorkList(U);  // Dropped a use. +        Worklist.Add(U);  // Dropped a use.        ++NumCombined;      }      return 0;  // Do not modify these! @@ -12096,41 +11966,34 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {    // Cannonicalize fcmp_one -> fcmp_oeq    FCmpInst::Predicate FPred; Value *Y;    if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)),  -                             TrueDest, FalseDest))) -    if ((FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || -         FPred == FCmpInst::FCMP_OGE) && BI.getCondition()->hasOneUse()) { -      FCmpInst *I = cast<FCmpInst>(BI.getCondition()); -      FCmpInst::Predicate NewPred = FCmpInst::getInversePredicate(FPred); -      Instruction *NewSCC = new FCmpInst(NewPred, X, Y, "", I); -      NewSCC->takeName(I); -      // Swap Destinations and condition... -      BI.setCondition(NewSCC); +                             TrueDest, FalseDest)) && +      BI.getCondition()->hasOneUse()) +    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE || +        FPred == FCmpInst::FCMP_OGE) { +      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition()); +      Cond->setPredicate(FCmpInst::getInversePredicate(FPred)); +       +      // Swap Destinations and condition.        BI.setSuccessor(0, FalseDest);        BI.setSuccessor(1, TrueDest); -      RemoveFromWorkList(I); -      I->eraseFromParent(); -      AddToWorkList(NewSCC); +      Worklist.Add(Cond);        return &BI;      }    // Cannonicalize icmp_ne -> icmp_eq    ICmpInst::Predicate IPred;    if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)), -                      TrueDest, FalseDest))) -    if ((IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE || -         IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || -         IPred == ICmpInst::ICMP_SGE) && BI.getCondition()->hasOneUse()) { -      ICmpInst *I = cast<ICmpInst>(BI.getCondition()); -      ICmpInst::Predicate NewPred = ICmpInst::getInversePredicate(IPred); -      Instruction *NewSCC = new ICmpInst(NewPred, X, Y, "", I); -      NewSCC->takeName(I); -      // Swap Destinations and condition... -      BI.setCondition(NewSCC); +                      TrueDest, FalseDest)) && +      BI.getCondition()->hasOneUse()) +    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE || +        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE || +        IPred == ICmpInst::ICMP_SGE) { +      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition()); +      Cond->setPredicate(ICmpInst::getInversePredicate(IPred)); +      // Swap Destinations and condition.        BI.setSuccessor(0, FalseDest);        BI.setSuccessor(1, TrueDest); -      RemoveFromWorkList(I); -      I->eraseFromParent();; -      AddToWorkList(NewSCC); +      Worklist.Add(Cond);        return &BI;      } @@ -12145,10 +12008,10 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {          // change 'switch (X+4) case 1:' into 'switch (X) case -3'          for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)            SI.setOperand(i, -                   Context->getConstantExprSub(cast<Constant>(SI.getOperand(i)), +                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),                                                  AddRHS));          SI.setOperand(0, I->getOperand(0)); -        AddToWorkList(I); +        Worklist.Add(I);          return &SI;        }    } @@ -12163,10 +12026,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {    if (Constant *C = dyn_cast<Constant>(Agg)) {      if (isa<UndefValue>(C)) -      return ReplaceInstUsesWith(EV, Context->getUndef(EV.getType())); +      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));      if (isa<ConstantAggregateZero>(C)) -      return ReplaceInstUsesWith(EV, Context->getNullValue(EV.getType())); +      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));      if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {        // Extract the element indexed by the first index out of the constant @@ -12214,10 +12077,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {        // %E = insertvalue { i32 } %X, i32 42, 0        // by switching the order of the insert and extract (though the        // insertvalue should be left in, since it may have other uses). -      Value *NewEV = InsertNewInstBefore( -        ExtractValueInst::Create(IV->getAggregateOperand(), -                                 EV.idx_begin(), EV.idx_end()), -        EV); +      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), +                                                 EV.idx_begin(), EV.idx_end());        return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),                                       insi, inse);      } @@ -12303,17 +12164,17 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {  /// value is already around as a register, for example if it were inserted then  /// extracted from the vector.  static Value *FindScalarElement(Value *V, unsigned EltNo, -                                LLVMContext* Context) { +                                LLVMContext *Context) {    assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");    const VectorType *PTy = cast<VectorType>(V->getType());    unsigned Width = PTy->getNumElements();    if (EltNo >= Width)  // Out of range access. -    return Context->getUndef(PTy->getElementType()); +    return UndefValue::get(PTy->getElementType());    if (isa<UndefValue>(V)) -    return Context->getUndef(PTy->getElementType()); +    return UndefValue::get(PTy->getElementType());    else if (isa<ConstantAggregateZero>(V)) -    return Context->getNullValue(PTy->getElementType()); +    return Constant::getNullValue(PTy->getElementType());    else if (ConstantVector *CP = dyn_cast<ConstantVector>(V))      return CP->getOperand(EltNo);    else if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) { @@ -12339,7 +12200,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo,      else if (InEl < LHSWidth*2)        return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth, Context);      else -      return Context->getUndef(PTy->getElementType()); +      return UndefValue::get(PTy->getElementType());    }    // Otherwise, we don't know. @@ -12349,18 +12210,18 @@ static Value *FindScalarElement(Value *V, unsigned EltNo,  Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {    // If vector val is undef, replace extract with scalar undef.    if (isa<UndefValue>(EI.getOperand(0))) -    return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); +    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));    // If vector val is constant 0, replace extract with scalar 0.    if (isa<ConstantAggregateZero>(EI.getOperand(0))) -    return ReplaceInstUsesWith(EI, Context->getNullValue(EI.getType())); +    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));    if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {      // If vector val is constant with all elements the same, replace EI with      // that element. When the elements are not identical, we cannot replace yet      // (we do that below, but only when the index is constant).      Constant *op0 = C->getOperand(0); -    for (unsigned i = 1; i < C->getNumOperands(); ++i) +    for (unsigned i = 1; i != C->getNumOperands(); ++i)        if (C->getOperand(i) != op0) {          op0 = 0;           break; @@ -12373,13 +12234,12 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {    // find a previously computed scalar that was inserted into the vector.    if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {      unsigned IndexVal = IdxC->getZExtValue(); -    unsigned VectorWidth =  -      cast<VectorType>(EI.getOperand(0)->getType())->getNumElements(); +    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();      // If this is extracting an invalid index, turn this into undef, to avoid      // crashing the code below.      if (IndexVal >= VectorWidth) -      return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); +      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));      // This instruction only demands the single element from the input vector.      // If the input vector has a single use, simplify it based on this use @@ -12411,42 +12271,27 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {    }    if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { -    if (I->hasOneUse()) { -      // Push extractelement into predecessor operation if legal and -      // profitable to do so -      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { -        bool isConstantElt = isa<ConstantInt>(EI.getOperand(1)); -        if (CheapToScalarize(BO, isConstantElt)) { -          ExtractElementInst *newEI0 =  -            new ExtractElementInst(BO->getOperand(0), EI.getOperand(1), -                                   EI.getName()+".lhs"); -          ExtractElementInst *newEI1 = -            new ExtractElementInst(BO->getOperand(1), EI.getOperand(1), -                                   EI.getName()+".rhs"); -          InsertNewInstBefore(newEI0, EI); -          InsertNewInstBefore(newEI1, EI); -          return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); -        } -      } else if (isa<LoadInst>(I)) { -        unsigned AS =  -          cast<PointerType>(I->getOperand(0)->getType())->getAddressSpace(); -        Value *Ptr = InsertBitCastBefore(I->getOperand(0), -                                  Context->getPointerType(EI.getType(), AS),EI); -        GetElementPtrInst *GEP = -          GetElementPtrInst::Create(Ptr, EI.getOperand(1), I->getName()+".gep"); -        InsertNewInstBefore(GEP, EI); -        return new LoadInst(GEP); -      } -    } -    if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { +    // Push extractelement into predecessor operation if legal and +    // profitable to do so +    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { +      if (I->hasOneUse() && +          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { +        Value *newEI0 = +          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), +                                        EI.getName()+".lhs"); +        Value *newEI1 = +          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), +                                        EI.getName()+".rhs"); +        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); +      } +    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {        // Extracting the inserted element?        if (IE->getOperand(2) == EI.getOperand(1))          return ReplaceInstUsesWith(EI, IE->getOperand(1));        // If the inserted and extracted elements are constants, they must not        // be the same value, extract from the pre-inserted value instead. -      if (isa<Constant>(IE->getOperand(2)) && -          isa<Constant>(EI.getOperand(1))) { -        AddUsesToWorkList(EI); +      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { +        Worklist.AddValue(EI.getOperand(0));          EI.setOperand(0, IE->getOperand(0));          return &EI;        } @@ -12465,11 +12310,14 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {            SrcIdx -= LHSWidth;            Src = SVI->getOperand(1);          } else { -          return ReplaceInstUsesWith(EI, Context->getUndef(EI.getType())); +          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));          } -        return new ExtractElementInst(Src, SrcIdx); +        return ExtractElementInst::Create(Src, +                         ConstantInt::get(Type::getInt32Ty(*Context), SrcIdx, +                                          false));        }      } +    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)    }    return 0;  } @@ -12479,21 +12327,21 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {  /// Otherwise, return false.  static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,                                           std::vector<Constant*> &Mask, -                                         LLVMContext* Context) { +                                         LLVMContext *Context) {    assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&           "Invalid CollectSingleShuffleElements");    unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();    if (isa<UndefValue>(V)) { -    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty)); +    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));      return true;    } else if (V == LHS) {      for (unsigned i = 0; i != NumElts; ++i) -      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i)); +      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));      return true;    } else if (V == RHS) {      for (unsigned i = 0; i != NumElts; ++i) -      Mask.push_back(Context->getConstantInt(Type::Int32Ty, i+NumElts)); +      Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i+NumElts));      return true;    } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {      // If this is an insert of an extract from some other vector, include it. @@ -12510,7 +12358,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,        // transitively ok.        if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask, Context)) {          // If so, update the mask to reflect the inserted undef. -        Mask[InsertedIdx] = Context->getUndef(Type::Int32Ty); +        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(*Context));          return true;        }            } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ @@ -12527,11 +12375,11 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,              // If so, update the mask to reflect the inserted value.              if (EI->getOperand(0) == LHS) {                Mask[InsertedIdx % NumElts] =  -                 Context->getConstantInt(Type::Int32Ty, ExtractedIdx); +                 ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx);              } else {                assert(EI->getOperand(0) == RHS);                Mask[InsertedIdx % NumElts] =  -                Context->getConstantInt(Type::Int32Ty, ExtractedIdx+NumElts); +                ConstantInt::get(Type::getInt32Ty(*Context), ExtractedIdx+NumElts);              }              return true; @@ -12549,17 +12397,17 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,  /// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask  /// that computes V and the LHS value of the shuffle.  static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask, -                                     Value *&RHS, LLVMContext* Context) { +                                     Value *&RHS, LLVMContext *Context) {    assert(isa<VectorType>(V->getType()) &&            (RHS == 0 || V->getType() == RHS->getType()) &&           "Invalid shuffle!");    unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();    if (isa<UndefValue>(V)) { -    Mask.assign(NumElts, Context->getUndef(Type::Int32Ty)); +    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(*Context)));      return V;    } else if (isa<ConstantAggregateZero>(V)) { -    Mask.assign(NumElts, Context->getConstantInt(Type::Int32Ty, 0)); +    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(*Context), 0));      return V;    } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {      // If this is an insert of an extract from some other vector, include it. @@ -12580,7 +12428,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,            RHS = EI->getOperand(0);            Value *V = CollectShuffleElements(VecOp, Mask, RHS, Context);            Mask[InsertedIdx % NumElts] =  -            Context->getConstantInt(Type::Int32Ty, NumElts+ExtractedIdx); +            ConstantInt::get(Type::getInt32Ty(*Context), NumElts+ExtractedIdx);            return V;          } @@ -12590,7 +12438,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,            // Everything but the extracted element is replaced with the RHS.            for (unsigned i = 0; i != NumElts; ++i) {              if (i != InsertedIdx) -              Mask[i] = Context->getConstantInt(Type::Int32Ty, NumElts+i); +              Mask[i] = ConstantInt::get(Type::getInt32Ty(*Context), NumElts+i);            }            return V;          } @@ -12608,7 +12456,7 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,    // Otherwise, can't do anything fancy.  Return an identity vector.    for (unsigned i = 0; i != NumElts; ++i) -    Mask.push_back(Context->getConstantInt(Type::Int32Ty, i)); +    Mask.push_back(ConstantInt::get(Type::getInt32Ty(*Context), i));    return V;  } @@ -12635,45 +12483,23 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {          return ReplaceInstUsesWith(IE, VecOp);        if (InsertedIdx >= NumVectorElts)  // Out of range insert. -        return ReplaceInstUsesWith(IE, Context->getUndef(IE.getType())); +        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));        // If we are extracting a value from a vector, then inserting it right        // back into the same place, just use the input vector.        if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)          return ReplaceInstUsesWith(IE, VecOp);       -      // We could theoretically do this for ANY input.  However, doing so could -      // turn chains of insertelement instructions into a chain of shufflevector -      // instructions, and right now we do not merge shufflevectors.  As such, -      // only do this in a situation where it is clear that there is benefit. -      if (isa<UndefValue>(VecOp) || isa<ConstantAggregateZero>(VecOp)) { -        // Turn this into shuffle(EIOp0, VecOp, Mask).  The result has all of -        // the values of VecOp, except then one read from EIOp0. -        // Build a new shuffle mask. -        std::vector<Constant*> Mask; -        if (isa<UndefValue>(VecOp)) -          Mask.assign(NumVectorElts, Context->getUndef(Type::Int32Ty)); -        else { -          assert(isa<ConstantAggregateZero>(VecOp) && "Unknown thing"); -          Mask.assign(NumVectorElts, Context->getConstantInt(Type::Int32Ty, -                                                       NumVectorElts)); -        }  -        Mask[InsertedIdx] =  -                           Context->getConstantInt(Type::Int32Ty, ExtractedIdx); -        return new ShuffleVectorInst(EI->getOperand(0), VecOp, -                                     Context->getConstantVector(Mask)); -      } -              // If this insertelement isn't used by some other insertelement, turn it        // (and any insertelements it points to), into one big shuffle.        if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {          std::vector<Constant*> Mask;          Value *RHS = 0;          Value *LHS = CollectShuffleElements(&IE, Mask, RHS, Context); -        if (RHS == 0) RHS = Context->getUndef(LHS->getType()); +        if (RHS == 0) RHS = UndefValue::get(LHS->getType());          // We now have a shuffle of LHS, RHS, Mask.          return new ShuffleVectorInst(LHS, RHS, -                                     Context->getConstantVector(Mask)); +                                     ConstantVector::get(Mask));        }      }    } @@ -12697,7 +12523,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {    // Undefined shuffle mask -> undefined value.    if (isa<UndefValue>(SVI.getOperand(2))) -    return ReplaceInstUsesWith(SVI, Context->getUndef(SVI.getType())); +    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));    unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); @@ -12724,21 +12550,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {      std::vector<Constant*> Elts;      for (unsigned i = 0, e = Mask.size(); i != e; ++i) {        if (Mask[i] >= 2*e) -        Elts.push_back(Context->getUndef(Type::Int32Ty)); +        Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));        else {          if ((Mask[i] >= e && isa<UndefValue>(RHS)) ||              (Mask[i] <  e && isa<UndefValue>(LHS))) {            Mask[i] = 2*e;     // Turn into undef. -          Elts.push_back(Context->getUndef(Type::Int32Ty)); +          Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));          } else {            Mask[i] = Mask[i] % e;  // Force to LHS. -          Elts.push_back(Context->getConstantInt(Type::Int32Ty, Mask[i])); +          Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), Mask[i]));          }        }      }      SVI.setOperand(0, SVI.getOperand(1)); -    SVI.setOperand(1, Context->getUndef(RHS->getType())); -    SVI.setOperand(2, Context->getConstantVector(Elts)); +    SVI.setOperand(1, UndefValue::get(RHS->getType())); +    SVI.setOperand(2, ConstantVector::get(Elts));      LHS = SVI.getOperand(0);      RHS = SVI.getOperand(1);      MadeChange = true; @@ -12788,14 +12614,14 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {          std::vector<Constant*> Elts;          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {            if (NewMask[i] >= LHSInNElts*2) { -            Elts.push_back(Context->getUndef(Type::Int32Ty)); +            Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));            } else { -            Elts.push_back(Context->getConstantInt(Type::Int32Ty, NewMask[i])); +            Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i]));            }          }          return new ShuffleVectorInst(LHSSVI->getOperand(0),                                       LHSSVI->getOperand(1), -                                     Context->getConstantVector(Elts)); +                                     ConstantVector::get(Elts));        }      }    } @@ -12855,6 +12681,9 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,                                         const TargetData *TD) {    SmallVector<BasicBlock*, 256> Worklist;    Worklist.push_back(BB); +   +  std::vector<Instruction*> InstrsForInstCombineWorklist; +  InstrsForInstCombineWorklist.reserve(128);    while (!Worklist.empty()) {      BB = Worklist.back(); @@ -12863,44 +12692,28 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,      // We have now visited this block!  If we've already been here, ignore it.      if (!Visited.insert(BB)) continue; -    DbgInfoIntrinsic *DBI_Prev = NULL;      for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {        Instruction *Inst = BBI++;        // DCE instruction if trivially dead.        if (isInstructionTriviallyDead(Inst)) {          ++NumDeadInst; -        DOUT << "IC: DCE: " << *Inst; +        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');          Inst->eraseFromParent();          continue;        }        // ConstantProp instruction if trivially constant. -      if (Constant *C = ConstantFoldInstruction(Inst, TD)) { -        DOUT << "IC: ConstFold to: " << *C << " from: " << *Inst; +      if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) { +        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " +                     << *Inst << '\n');          Inst->replaceAllUsesWith(C);          ++NumConstProp;          Inst->eraseFromParent();          continue;        } -      -      // If there are two consecutive llvm.dbg.stoppoint calls then -      // it is likely that the optimizer deleted code in between these -      // two intrinsics.  -      DbgInfoIntrinsic *DBI_Next = dyn_cast<DbgInfoIntrinsic>(Inst); -      if (DBI_Next) { -        if (DBI_Prev -            && DBI_Prev->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint -            && DBI_Next->getIntrinsicID() == llvm::Intrinsic::dbg_stoppoint) { -          IC.RemoveFromWorkList(DBI_Prev); -          DBI_Prev->eraseFromParent(); -        } -        DBI_Prev = DBI_Next; -      } else { -        DBI_Prev = 0; -      } -      IC.AddToWorkList(Inst); +      InstrsForInstCombineWorklist.push_back(Inst);      }      // Recursively visit successors.  If this is a branch or switch on a @@ -12932,14 +12745,22 @@ static void AddReachableCodeToWorklist(BasicBlock *BB,      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)        Worklist.push_back(TI->getSuccessor(i));    } +   +  // Once we've found all of the instructions to add to instcombine's worklist, +  // add them in reverse order.  This way instcombine will visit from the top +  // of the function down.  This jives well with the way that it adds all uses +  // of instructions to the worklist after doing a transformation, thus avoiding +  // some N^2 behavior in pathological cases. +  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0], +                              InstrsForInstCombineWorklist.size());  }  bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { -  bool Changed = false; -  TD = &getAnalysis<TargetData>(); +  MadeIRChange = false; +  TD = getAnalysisIfAvailable<TargetData>(); -  DEBUG(DOUT << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " -             << F.getNameStr() << "\n"); +  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " +        << F.getNameStr() << "\n");    {      // Do a depth-first traversal of the function, populate the worklist with @@ -12957,71 +12778,73 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {          while (Term != BB->begin()) {   // Remove instrs bottom-up            BasicBlock::iterator I = Term; --I; -          DOUT << "IC: DCE: " << *I; +          DEBUG(errs() << "IC: DCE: " << *I << '\n');            // A debug intrinsic shouldn't force another iteration if we weren't            // going to do one without it.            if (!isa<DbgInfoIntrinsic>(I)) {              ++NumDeadInst; -            Changed = true; +            MadeIRChange = true;            } -          if (!I->use_empty()) -            I->replaceAllUsesWith(Context->getUndef(I->getType())); + + +          // If I is not void type then replaceAllUsesWith undef. +          // This allows ValueHandlers and custom metadata to adjust itself. +          if (!I->getType()->isVoidTy()) +            I->replaceAllUsesWith(UndefValue::get(I->getType()));            I->eraseFromParent();          }        }    } -  while (!Worklist.empty()) { -    Instruction *I = RemoveOneFromWorkList(); +  while (!Worklist.isEmpty()) { +    Instruction *I = Worklist.RemoveOne();      if (I == 0) continue;  // skip null values.      // Check to see if we can DCE the instruction.      if (isInstructionTriviallyDead(I)) { -      // Add operands to the worklist. -      if (I->getNumOperands() < 4) -        AddUsesToWorkList(*I); +      DEBUG(errs() << "IC: DCE: " << *I << '\n'); +      EraseInstFromFunction(*I);        ++NumDeadInst; - -      DOUT << "IC: DCE: " << *I; - -      I->eraseFromParent(); -      RemoveFromWorkList(I); -      Changed = true; +      MadeIRChange = true;        continue;      }      // Instruction isn't dead, see if we can constant propagate it. -    if (Constant *C = ConstantFoldInstruction(I, TD)) { -      DOUT << "IC: ConstFold to: " << *C << " from: " << *I; +    if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) { +      DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');        // Add operands to the worklist. -      AddUsesToWorkList(*I);        ReplaceInstUsesWith(*I, C); -        ++NumConstProp; -      I->eraseFromParent(); -      RemoveFromWorkList(I); -      Changed = true; +      EraseInstFromFunction(*I); +      MadeIRChange = true;        continue;      } -    if (TD && -        (I->getType()->getTypeID() == Type::VoidTyID || -         I->isTrapping())) { +    if (TD) {        // See if we can constant fold its operands.        for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)          if (ConstantExpr *CE = dyn_cast<ConstantExpr>(i)) -          if (Constant *NewC = ConstantFoldConstantExpression(CE, TD)) +          if (Constant *NewC = ConstantFoldConstantExpression(CE,    +                                  F.getContext(), TD))              if (NewC != CE) { -              i->set(NewC); -              Changed = true; +              *i = NewC; +              MadeIRChange = true;              }      }      // See if we can trivially sink this instruction to a successor basic block.      if (I->hasOneUse()) {        BasicBlock *BB = I->getParent(); -      BasicBlock *UserParent = cast<Instruction>(I->use_back())->getParent(); +      Instruction *UserInst = cast<Instruction>(I->use_back()); +      BasicBlock *UserParent; +       +      // Get the block the use occurs in. +      if (PHINode *PN = dyn_cast<PHINode>(UserInst)) +        UserParent = PN->getIncomingBlock(I->use_begin().getUse()); +      else +        UserParent = UserInst->getParent(); +              if (UserParent != BB) {          bool UserIsSuccessor = false;          // See if the user is one of our successors. @@ -13034,31 +12857,34 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {          // If the user is one of our immediate successors, and if that successor          // only has us as a predecessors (we'd have to split the critical edge          // otherwise), we can keep going. -        if (UserIsSuccessor && !isa<PHINode>(I->use_back()) && -            next(pred_begin(UserParent)) == pred_end(UserParent)) +        if (UserIsSuccessor && UserParent->getSinglePredecessor())            // Okay, the CFG is simple enough, try to sink this instruction. -          Changed |= TryToSinkInstruction(I, UserParent); +          MadeIRChange |= TryToSinkInstruction(I, UserParent);        }      } -    // Now that we have an instruction, try combining it to simplify it... +    // Now that we have an instruction, try combining it to simplify it. +    Builder->SetInsertPoint(I->getParent(), I); +      #ifndef NDEBUG      std::string OrigI;  #endif -    DEBUG(std::ostringstream SS; I->print(SS); OrigI = SS.str();); +    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); +    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n'); +      if (Instruction *Result = visit(*I)) {        ++NumCombined;        // Should we replace the old instruction with a new one?        if (Result != I) { -        DOUT << "IC: Old = " << *I -             << "    New = " << *Result; +        DEBUG(errs() << "IC: Old = " << *I << '\n' +                     << "    New = " << *Result << '\n');          // Everything uses the new instruction now.          I->replaceAllUsesWith(Result);          // Push the new instruction and any users onto the worklist. -        AddToWorkList(Result); -        AddUsersToWorkList(*Result); +        Worklist.Add(Result); +        Worklist.AddUsersToWorkList(*Result);          // Move the name to the new instruction first.          Result->takeName(I); @@ -13073,52 +12899,42 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {          InstParent->getInstList().insert(InsertPos, Result); -        // Make sure that we reprocess all operands now that we reduced their -        // use counts. -        AddUsesToWorkList(*I); - -        // Instructions can end up on the worklist more than once.  Make sure -        // we do not process an instruction that has been deleted. -        RemoveFromWorkList(I); - -        // Erase the old instruction. -        InstParent->getInstList().erase(I); +        EraseInstFromFunction(*I);        } else {  #ifndef NDEBUG -        DOUT << "IC: Mod = " << OrigI -             << "    New = " << *I; +        DEBUG(errs() << "IC: Mod = " << OrigI << '\n' +                     << "    New = " << *I << '\n');  #endif          // If the instruction was modified, it's possible that it is now dead.          // if so, remove it.          if (isInstructionTriviallyDead(I)) { -          // Make sure we process all operands now that we are reducing their -          // use counts. -          AddUsesToWorkList(*I); - -          // Instructions may end up in the worklist more than once.  Erase all -          // occurrences of this instruction. -          RemoveFromWorkList(I); -          I->eraseFromParent(); +          EraseInstFromFunction(*I);          } else { -          AddToWorkList(I); -          AddUsersToWorkList(*I); +          Worklist.Add(I); +          Worklist.AddUsersToWorkList(*I);          }        } -      Changed = true; +      MadeIRChange = true;      }    } -  assert(WorklistMap.empty() && "Worklist empty, but map not?"); -     -  // Do an explicit clear, this shrinks the map if needed. -  WorklistMap.clear(); -  return Changed; +  Worklist.Zap(); +  return MadeIRChange;  }  bool InstCombiner::runOnFunction(Function &F) {    MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); +  Context = &F.getContext(); +   +   +  /// Builder - This is an IRBuilder that automatically inserts new +  /// instructions into the worklist when they are created. +  IRBuilder<true, ConstantFolder, InstCombineIRInserter>  +    TheBuilder(F.getContext(), ConstantFolder(F.getContext()), +               InstCombineIRInserter(Worklist)); +  Builder = &TheBuilder;    bool EverMadeChange = false; @@ -13126,6 +12942,8 @@ bool InstCombiner::runOnFunction(Function &F) {    unsigned Iteration = 0;    while (DoOneIteration(F, Iteration++))      EverMadeChange = true; +   +  Builder = 0;    return EverMadeChange;  } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index dee7bfba21dd..8b11edd891fd 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -19,6 +19,7 @@  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h"  #include "llvm/Target/TargetData.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/Statistic.h" @@ -26,13 +27,13 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallSet.h"  #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" -#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h"  using namespace llvm;  STATISTIC(NumThreads, "Number of jumps threaded");  STATISTIC(NumFolds,   "Number of terminators folded"); +STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");  static cl::opt<unsigned>  Threshold("jump-threading-threshold",  @@ -56,7 +57,7 @@ namespace {    /// In this case, the unconditional branch at the end of the first if can be    /// revectored to the false side of the second if.    /// -  class VISIBILITY_HIDDEN JumpThreading : public FunctionPass { +  class JumpThreading : public FunctionPass {      TargetData *TD;  #ifdef NDEBUG      SmallPtrSet<BasicBlock*, 16> LoopHeaders; @@ -68,15 +69,16 @@ namespace {      JumpThreading() : FunctionPass(&ID) {}      virtual void getAnalysisUsage(AnalysisUsage &AU) const { -      AU.addRequired<TargetData>();      }      bool runOnFunction(Function &F);      void FindLoopHeaders(Function &F);      bool ProcessBlock(BasicBlock *BB); -    bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB, -                    unsigned JumpThreadCost); +    bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB); +    bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, +                                          BasicBlock *PredBB); +      BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);      bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);      bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); @@ -99,8 +101,8 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }  /// runOnFunction - Top level algorithm.  ///  bool JumpThreading::runOnFunction(Function &F) { -  DOUT << "Jump threading on function '" << F.getNameStart() << "'\n"; -  TD = &getAnalysis<TargetData>(); +  DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n"); +  TD = getAnalysisIfAvailable<TargetData>();    FindLoopHeaders(F); @@ -119,8 +121,8 @@ bool JumpThreading::runOnFunction(Function &F) {        // edges which simplifies the CFG.        if (pred_begin(BB) == pred_end(BB) &&            BB != &BB->getParent()->getEntryBlock()) { -        DOUT << "  JT: Deleting dead block '" << BB->getNameStart() -             << "' with terminator: " << *BB->getTerminator(); +        DEBUG(errs() << "  JT: Deleting dead block '" << BB->getName() +              << "' with terminator: " << *BB->getTerminator() << '\n');          LoopHeaders.erase(BB);          DeleteDeadBlock(BB);          Changed = true; @@ -134,6 +136,48 @@ bool JumpThreading::runOnFunction(Function &F) {    return EverChanged;  } +/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to +/// thread across it. +static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { +  /// Ignore PHI nodes, these will be flattened when duplication happens. +  BasicBlock::const_iterator I = BB->getFirstNonPHI(); +   +  // Sum up the cost of each instruction until we get to the terminator.  Don't +  // include the terminator because the copy won't include it. +  unsigned Size = 0; +  for (; !isa<TerminatorInst>(I); ++I) { +    // Debugger intrinsics don't incur code size. +    if (isa<DbgInfoIntrinsic>(I)) continue; +     +    // If this is a pointer->pointer bitcast, it is free. +    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) +      continue; +     +    // All other instructions count for at least one unit. +    ++Size; +     +    // Calls are more expensive.  If they are non-intrinsic calls, we model them +    // as having cost of 4.  If they are a non-vector intrinsic, we model them +    // as having cost of 2 total, and if they are a vector intrinsic, we model +    // them as having cost 1. +    if (const CallInst *CI = dyn_cast<CallInst>(I)) { +      if (!isa<IntrinsicInst>(CI)) +        Size += 3; +      else if (!isa<VectorType>(CI->getType())) +        Size += 1; +    } +  } +   +  // Threading through a switch statement is particularly profitable.  If this +  // block ends in a switch, decrease its cost to make it more likely to happen. +  if (isa<SwitchInst>(I)) +    Size = Size > 6 ? Size-6 : 0; +   +  return Size; +} + + +  /// FindLoopHeaders - We do not want jump threading to turn proper loop  /// structures into irreducible loops.  Doing this breaks up the loop nesting  /// hierarchy and pessimizes later transformations.  To prevent this from @@ -173,52 +217,34 @@ BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {    if (CommonPreds.size() == 1)      return CommonPreds[0]; -  DOUT << "  Factoring out " << CommonPreds.size() -       << " common predecessors.\n"; +  DEBUG(errs() << "  Factoring out " << CommonPreds.size() +        << " common predecessors.\n");    return SplitBlockPredecessors(PN->getParent(),                                  &CommonPreds[0], CommonPreds.size(),                                  ".thr_comm", this);  } -/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to -/// thread across it. -static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) { -  /// Ignore PHI nodes, these will be flattened when duplication happens. -  BasicBlock::const_iterator I = BB->getFirstNonPHI(); - -  // Sum up the cost of each instruction until we get to the terminator.  Don't -  // include the terminator because the copy won't include it. -  unsigned Size = 0; -  for (; !isa<TerminatorInst>(I); ++I) { -    // Debugger intrinsics don't incur code size. -    if (isa<DbgInfoIntrinsic>(I)) continue; -     -    // If this is a pointer->pointer bitcast, it is free. -    if (isa<BitCastInst>(I) && isa<PointerType>(I->getType())) -      continue; -     -    // All other instructions count for at least one unit. -    ++Size; -     -    // Calls are more expensive.  If they are non-intrinsic calls, we model them -    // as having cost of 4.  If they are a non-vector intrinsic, we model them -    // as having cost of 2 total, and if they are a vector intrinsic, we model -    // them as having cost 1. -    if (const CallInst *CI = dyn_cast<CallInst>(I)) { -      if (!isa<IntrinsicInst>(CI)) -        Size += 3; -      else if (!isa<VectorType>(CI->getType())) -        Size += 1; -    } +/// GetBestDestForBranchOnUndef - If we determine that the specified block ends +/// in an undefined jump, decide which block is best to revector to. +/// +/// Since we can pick an arbitrary destination, we pick the successor with the +/// fewest predecessors.  This should reduce the in-degree of the others. +/// +static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { +  TerminatorInst *BBTerm = BB->getTerminator(); +  unsigned MinSucc = 0; +  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); +  // Compute the successor with the minimum number of predecessors. +  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); +  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { +    TestBB = BBTerm->getSuccessor(i); +    unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); +    if (NumPreds < MinNumPreds) +      MinSucc = i;    } -  // Threading through a switch statement is particularly profitable.  If this -  // block ends in a switch, decrease its cost to make it more likely to happen. -  if (isa<SwitchInst>(I)) -    Size = Size > 6 ? Size-6 : 0; -   -  return Size; +  return MinSucc;  }  /// ProcessBlock - If there are any predecessors whose control can be threaded @@ -262,39 +288,28 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {    // terminator to an unconditional branch.  This can occur due to threading in    // other blocks.    if (isa<ConstantInt>(Condition)) { -    DOUT << "  In block '" << BB->getNameStart() -         << "' folding terminator: " << *BB->getTerminator(); +    DEBUG(errs() << "  In block '" << BB->getName() +          << "' folding terminator: " << *BB->getTerminator() << '\n');      ++NumFolds;      ConstantFoldTerminator(BB);      return true;    }    // If the terminator is branching on an undef, we can pick any of the -  // successors to branch to.  Since this is arbitrary, we pick the successor -  // with the fewest predecessors.  This should reduce the in-degree of the -  // others. +  // successors to branch to.  Let GetBestDestForJumpOnUndef decide.    if (isa<UndefValue>(Condition)) { -    TerminatorInst *BBTerm = BB->getTerminator(); -    unsigned MinSucc = 0; -    BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc); -    // Compute the successor with the minimum number of predecessors. -    unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); -    for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) { -      TestBB = BBTerm->getSuccessor(i); -      unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB)); -      if (NumPreds < MinNumPreds) -        MinSucc = i; -    } +    unsigned BestSucc = GetBestDestForJumpOnUndef(BB);      // Fold the branch/switch. +    TerminatorInst *BBTerm = BB->getTerminator();      for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) { -      if (i == MinSucc) continue; +      if (i == BestSucc) continue;        BBTerm->getSuccessor(i)->removePredecessor(BB);      } -    DOUT << "  In block '" << BB->getNameStart() -         << "' folding undef terminator: " << *BBTerm; -    BranchInst::Create(BBTerm->getSuccessor(MinSucc), BBTerm); +    DEBUG(errs() << "  In block '" << BB->getName() +          << "' folding undef terminator: " << *BBTerm << '\n'); +    BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);      BBTerm->eraseFromParent();      return true;    } @@ -419,8 +434,8 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,    else if (PredBI->getSuccessor(0) != BB)      BranchDir = false;    else { -    DOUT << "  In block '" << PredBB->getNameStart() -         << "' folding terminator: " << *PredBB->getTerminator(); +    DEBUG(errs() << "  In block '" << PredBB->getName() +          << "' folding terminator: " << *PredBB->getTerminator() << '\n');      ++NumFolds;      ConstantFoldTerminator(PredBB);      return true; @@ -431,29 +446,24 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,    // If the dest block has one predecessor, just fix the branch condition to a    // constant and fold it.    if (BB->getSinglePredecessor()) { -    DOUT << "  In block '" << BB->getNameStart() -         << "' folding condition to '" << BranchDir << "': " -         << *BB->getTerminator(); +    DEBUG(errs() << "  In block '" << BB->getName() +          << "' folding condition to '" << BranchDir << "': " +          << *BB->getTerminator() << '\n');      ++NumFolds; -    DestBI->setCondition(Context->getConstantInt(Type::Int1Ty, BranchDir)); +    Value *OldCond = DestBI->getCondition(); +    DestBI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), +                                          BranchDir));      ConstantFoldTerminator(BB); +    RecursivelyDeleteTriviallyDeadInstructions(OldCond);      return true;    } -   -  // Otherwise we need to thread from PredBB to DestBB's successor which -  // involves code duplication.  Check to see if it is worth it. -  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); -  if (JumpThreadCost > Threshold) { -    DOUT << "  Not threading BB '" << BB->getNameStart() -         << "' - Cost is too high: " << JumpThreadCost << "\n"; -    return false; -  } +     // Next, figure out which successor we are threading to.    BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);    // Ok, try to thread it! -  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); +  return ThreadEdge(BB, PredBB, SuccBB);  }  /// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that @@ -472,7 +482,6 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,    if (PredBB == DestBB)      return false; -      SwitchInst *PredSI = cast<SwitchInst>(PredBB->getTerminator());    SwitchInst *DestSI = cast<SwitchInst>(DestBB->getTerminator()); @@ -508,8 +517,8 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,        // Otherwise, we're safe to make the change.  Make sure that the edge from        // DestSI to DestSucc is not critical and has no PHI nodes. -      DOUT << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI; -      DOUT << "THROUGH: " << *DestSI; +      DEBUG(errs() << "FORWARDING EDGE " << *DestVal << "   FROM: " << *PredSI); +      DEBUG(errs() << "THROUGH: " << *DestSI);        // If the destination has PHI nodes, just split the edge for updating        // simplicity. @@ -564,7 +573,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {      // If the returned value is the load itself, replace with an undef. This can      // only happen in dead loops. -    if (AvailableVal == LI) AvailableVal = Context->getUndef(LI->getType()); +    if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());      LI->replaceAllUsesWith(AvailableVal);      LI->eraseFromParent();      return true; @@ -685,49 +694,74 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {  } -/// ProcessJumpOnPHI - We have a conditional branch of switch on a PHI node in +/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in  /// the current block.  See if there are any simplifications we can do based on  /// inputs to the phi node.  ///   bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) { -  // See if the phi node has any constant values.  If so, we can determine where -  // the corresponding predecessor will branch. -  ConstantInt *PredCst = 0; -  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) -    if ((PredCst = dyn_cast<ConstantInt>(PN->getIncomingValue(i)))) -      break; -   -  // If no incoming value has a constant, we don't know the destination of any -  // predecessors. -  if (PredCst == 0) -    return false; -   -  // See if the cost of duplicating this block is low enough.    BasicBlock *BB = PN->getParent(); -  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); -  if (JumpThreadCost > Threshold) { -    DOUT << "  Not threading BB '" << BB->getNameStart() -         << "' - Cost is too high: " << JumpThreadCost << "\n"; -    return false; +   +  // See if the phi node has any constant integer or undef values.  If so, we +  // can determine where the corresponding predecessor will branch. +  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { +    Value *PredVal = PN->getIncomingValue(i); +     +    // Check to see if this input is a constant integer.  If so, the direction +    // of the branch is predictable. +    if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) { +      // Merge any common predecessors that will act the same. +      BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI); +       +      BasicBlock *SuccBB; +      if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) +        SuccBB = BI->getSuccessor(CI->isZero()); +      else { +        SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); +        SuccBB = SI->getSuccessor(SI->findCaseValue(CI)); +      } +       +      // Ok, try to thread it! +      return ThreadEdge(BB, PredBB, SuccBB); +    } +     +    // If the input is an undef, then it doesn't matter which way it will go. +    // Pick an arbitrary dest and thread the edge. +    if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) { +      // Merge any common predecessors that will act the same. +      BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV); +      BasicBlock *SuccBB = +        BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB)); +       +      // Ok, try to thread it! +      return ThreadEdge(BB, PredBB, SuccBB); +    }    } -  // If so, we can actually do this threading.  Merge any common predecessors -  // that will act the same. -  BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); +  // If the incoming values are all variables, we don't know the destination of +  // any predecessors.  However, if any of the predecessor blocks end in an +  // unconditional branch, we can *duplicate* the jump into that block in order +  // to further encourage jump threading and to eliminate cases where we have +  // branch on a phi of an icmp (branch on icmp is much better). + +  // We don't want to do this tranformation for switches, because we don't +  // really want to duplicate a switch. +  if (isa<SwitchInst>(BB->getTerminator())) +    return false; -  // Next, figure out which successor we are threading to. -  BasicBlock *SuccBB; -  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) -    SuccBB = BI->getSuccessor(PredCst == Context->getConstantIntFalse()); -  else { -    SwitchInst *SI = cast<SwitchInst>(BB->getTerminator()); -    SuccBB = SI->getSuccessor(SI->findCaseValue(PredCst)); +  // Look for unconditional branch predecessors. +  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { +    BasicBlock *PredBB = PN->getIncomingBlock(i); +    if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) +      if (PredBr->isUnconditional() && +          // Try to duplicate BB into PredBB. +          DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) +        return true;    } -   -  // Ok, try to thread it! -  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); + +  return false;  } +  /// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch  /// whose condition is an AND/OR where one side is PN.  If PN has constant  /// operands that permit us to evaluate the condition for some operand, thread @@ -756,7 +790,8 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,    // We can only do the simplification for phi nodes of 'false' with AND or    // 'true' with OR.  See if we have any entries in the phi for this.    unsigned PredNo = ~0U; -  ConstantInt *PredCst = Context->getConstantInt(Type::Int1Ty, !isAnd); +  ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()), +                                          !isAnd);    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {      if (PN->getIncomingValue(i) == PredCst) {        PredNo = i; @@ -768,14 +803,6 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,    if (PredNo == ~0U)      return false; -  // See if the cost of duplicating this block is low enough. -  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); -  if (JumpThreadCost > Threshold) { -    DOUT << "  Not threading BB '" << BB->getNameStart() -         << "' - Cost is too high: " << JumpThreadCost << "\n"; -    return false; -  } -    // If so, we can actually do this threading.  Merge any common predecessors    // that will act the same.    BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst); @@ -787,7 +814,7 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,    BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);    // Ok, try to thread it! -  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); +  return ThreadEdge(BB, PredBB, SuccBB);  }  /// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right @@ -795,15 +822,15 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,  /// result can not be determined, a null pointer is returned.  static Constant *GetResultOfComparison(CmpInst::Predicate pred,                                         Value *LHS, Value *RHS, -                                       LLVMContext* Context) { +                                       LLVMContext &Context) {    if (Constant *CLHS = dyn_cast<Constant>(LHS))      if (Constant *CRHS = dyn_cast<Constant>(RHS)) -      return Context->getConstantExprCompare(pred, CLHS, CRHS); +      return ConstantExpr::getCompare(pred, CLHS, CRHS);    if (LHS == RHS)      if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))        return ICmpInst::isTrueWhenEqual(pred) ?  -                 Context->getConstantIntTrue() : Context->getConstantIntFalse(); +                 ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context);    return 0;  } @@ -829,7 +856,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {      PredVal = PN->getIncomingValue(i);      Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, -                                          RHS, Context); +                                          RHS, Cmp->getContext());      if (!Res) {        PredVal = 0;        continue; @@ -854,14 +881,6 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {    if (PredVal == 0)      return false; -  // See if the cost of duplicating this block is low enough. -  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); -  if (JumpThreadCost > Threshold) { -    DOUT << "  Not threading BB '" << BB->getNameStart() -         << "' - Cost is too high: " << JumpThreadCost << "\n"; -    return false; -  } -      // If so, we can actually do this threading.  Merge any common predecessors    // that will act the same.    BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal); @@ -870,58 +889,77 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {    BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);    // Ok, try to thread it! -  return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost); +  return ThreadEdge(BB, PredBB, SuccBB);  } +/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new +/// predecessor to the PHIBB block.  If it has PHI nodes, add entries for +/// NewPred using the entries from OldPred (suitably mapped). +static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB, +                                            BasicBlock *OldPred, +                                            BasicBlock *NewPred, +                                     DenseMap<Instruction*, Value*> &ValueMap) { +  for (BasicBlock::iterator PNI = PHIBB->begin(); +       PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) { +    // Ok, we have a PHI node.  Figure out what the incoming value was for the +    // DestBlock. +    Value *IV = PN->getIncomingValueForBlock(OldPred); +     +    // Remap the value if necessary. +    if (Instruction *Inst = dyn_cast<Instruction>(IV)) { +      DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst); +      if (I != ValueMap.end()) +        IV = I->second; +    } +     +    PN->addIncoming(IV, NewPred); +  } +} +  /// ThreadEdge - We have decided that it is safe and profitable to thread an  /// edge from PredBB to SuccBB across BB.  Transform the IR to reflect this  /// change.  bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,  -                               BasicBlock *SuccBB, unsigned JumpThreadCost) { - +                               BasicBlock *SuccBB) {    // If threading to the same block as we come from, we would infinite loop.    if (SuccBB == BB) { -    DOUT << "  Not threading across BB '" << BB->getNameStart() -         << "' - would thread to self!\n"; +    DEBUG(errs() << "  Not threading across BB '" << BB->getName() +          << "' - would thread to self!\n");      return false;    }    // If threading this would thread across a loop header, don't thread the edge.    // See the comments above FindLoopHeaders for justifications and caveats.    if (LoopHeaders.count(BB)) { -    DOUT << "  Not threading from '" << PredBB->getNameStart() -         << "' across loop header BB '" << BB->getNameStart() -         << "' to dest BB '" << SuccBB->getNameStart() -         << "' - it might create an irreducible loop!\n"; +    DEBUG(errs() << "  Not threading from '" << PredBB->getName() +          << "' across loop header BB '" << BB->getName() +          << "' to dest BB '" << SuccBB->getName() +          << "' - it might create an irreducible loop!\n");      return false;    } -  // And finally, do it! -  DOUT << "  Threading edge from '" << PredBB->getNameStart() << "' to '" -       << SuccBB->getNameStart() << "' with cost: " << JumpThreadCost -       << ", across block:\n    " -       << *BB << "\n"; -   -  // Jump Threading can not update SSA properties correctly if the values -  // defined in the duplicated block are used outside of the block itself.  For -  // this reason, we spill all values that are used outside of BB to the stack. -  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { -    if (!I->isUsedOutsideOfBlock(BB)) -      continue; -     -    // We found a use of I outside of BB.  Create a new stack slot to -    // break this inter-block usage pattern. -    DemoteRegToStack(*I); +  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB); +  if (JumpThreadCost > Threshold) { +    DEBUG(errs() << "  Not threading BB '" << BB->getName() +          << "' - Cost is too high: " << JumpThreadCost << "\n"); +    return false;    } -  +   +  // And finally, do it! +  DEBUG(errs() << "  Threading edge from '" << PredBB->getName() << "' to '" +        << SuccBB->getName() << "' with cost: " << JumpThreadCost +        << ", across block:\n    " +        << *BB << "\n"); +      // We are going to have to map operands from the original BB block to the new    // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to    // account for entry from PredBB.    DenseMap<Instruction*, Value*> ValueMapping; -  BasicBlock *NewBB = -    BasicBlock::Create(BB->getName()+".thread", BB->getParent(), BB); +  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),  +                                         BB->getName()+".thread",  +                                         BB->getParent(), BB);    NewBB->moveAfter(PredBB);    BasicBlock::iterator BI = BB->begin(); @@ -932,7 +970,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,    // mapping and using it to remap operands in the cloned instructions.    for (; !isa<TerminatorInst>(BI); ++BI) {      Instruction *New = BI->clone(); -    New->setName(BI->getNameStart()); +    New->setName(BI->getName());      NewBB->getInstList().push_back(New);      ValueMapping[BI] = New; @@ -951,21 +989,48 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,    // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the    // PHI nodes for NewBB now. -  for (BasicBlock::iterator PNI = SuccBB->begin(); isa<PHINode>(PNI); ++PNI) { -    PHINode *PN = cast<PHINode>(PNI); -    // Ok, we have a PHI node.  Figure out what the incoming value was for the -    // DestBlock. -    Value *IV = PN->getIncomingValueForBlock(BB); -     -    // Remap the value if necessary. -    if (Instruction *Inst = dyn_cast<Instruction>(IV)) { -      DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); -      if (I != ValueMapping.end()) -        IV = I->second; +  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping); +   +  // If there were values defined in BB that are used outside the block, then we +  // now have to update all uses of the value to use either the original value, +  // the cloned value, or some PHI derived value.  This can require arbitrary +  // PHI insertion, of which we are prepared to do, clean these up now. +  SSAUpdater SSAUpdate; +  SmallVector<Use*, 16> UsesToRename; +  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { +    // Scan all uses of this instruction to see if it is used outside of its +    // block, and if so, record them in UsesToRename. +    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; +         ++UI) { +      Instruction *User = cast<Instruction>(*UI); +      if (PHINode *UserPN = dyn_cast<PHINode>(User)) { +        if (UserPN->getIncomingBlock(UI) == BB) +          continue; +      } else if (User->getParent() == BB) +        continue; +       +      UsesToRename.push_back(&UI.getUse());      } -    PN->addIncoming(IV, NewBB); +     +    // If there are no uses outside the block, we're done with this instruction. +    if (UsesToRename.empty()) +      continue; +     +    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); + +    // We found a use of I outside of BB.  Rename all uses of I that are outside +    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks +    // with the two values we know. +    SSAUpdate.Initialize(I); +    SSAUpdate.AddAvailableValue(BB, I); +    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]); +     +    while (!UsesToRename.empty()) +      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); +    DEBUG(errs() << "\n");    } +      // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to    // NewBB instead of BB.  This eliminates predecessors from BB, which requires    // us to simplify any PHI nodes in BB. @@ -982,7 +1047,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,    BI = NewBB->begin();    for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {      Instruction *Inst = BI++; -    if (Constant *C = ConstantFoldInstruction(Inst, TD)) { +    if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {        Inst->replaceAllUsesWith(C);        Inst->eraseFromParent();        continue; @@ -995,3 +1060,120 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,    ++NumThreads;    return true;  } + +/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch +/// to BB which contains an i1 PHI node and a conditional branch on that PHI. +/// If we can duplicate the contents of BB up into PredBB do so now, this +/// improves the odds that the branch will be on an analyzable instruction like +/// a compare. +bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, +                                                     BasicBlock *PredBB) { +  // If BB is a loop header, then duplicating this block outside the loop would +  // cause us to transform this into an irreducible loop, don't do this. +  // See the comments above FindLoopHeaders for justifications and caveats. +  if (LoopHeaders.count(BB)) { +    DEBUG(errs() << "  Not duplicating loop header '" << BB->getName() +          << "' into predecessor block '" << PredBB->getName() +          << "' - it might create an irreducible loop!\n"); +    return false; +  } +   +  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB); +  if (DuplicationCost > Threshold) { +    DEBUG(errs() << "  Not duplicating BB '" << BB->getName() +          << "' - Cost is too high: " << DuplicationCost << "\n"); +    return false; +  } +   +  // Okay, we decided to do this!  Clone all the instructions in BB onto the end +  // of PredBB. +  DEBUG(errs() << "  Duplicating block '" << BB->getName() << "' into end of '" +        << PredBB->getName() << "' to eliminate branch on phi.  Cost: " +        << DuplicationCost << " block is:" << *BB << "\n"); +   +  // We are going to have to map operands from the original BB block into the +  // PredBB block.  Evaluate PHI nodes in BB. +  DenseMap<Instruction*, Value*> ValueMapping; +   +  BasicBlock::iterator BI = BB->begin(); +  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) +    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); +   +  BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator()); +   +  // Clone the non-phi instructions of BB into PredBB, keeping track of the +  // mapping and using it to remap operands in the cloned instructions. +  for (; BI != BB->end(); ++BI) { +    Instruction *New = BI->clone(); +    New->setName(BI->getName()); +    PredBB->getInstList().insert(OldPredBranch, New); +    ValueMapping[BI] = New; +     +    // Remap operands to patch up intra-block references. +    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i) +      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) { +        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst); +        if (I != ValueMapping.end()) +          New->setOperand(i, I->second); +      } +  } +   +  // Check to see if the targets of the branch had PHI nodes. If so, we need to +  // add entries to the PHI nodes for branch from PredBB now. +  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator()); +  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB, +                                  ValueMapping); +  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB, +                                  ValueMapping); +   +  // If there were values defined in BB that are used outside the block, then we +  // now have to update all uses of the value to use either the original value, +  // the cloned value, or some PHI derived value.  This can require arbitrary +  // PHI insertion, of which we are prepared to do, clean these up now. +  SSAUpdater SSAUpdate; +  SmallVector<Use*, 16> UsesToRename; +  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { +    // Scan all uses of this instruction to see if it is used outside of its +    // block, and if so, record them in UsesToRename. +    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; +         ++UI) { +      Instruction *User = cast<Instruction>(*UI); +      if (PHINode *UserPN = dyn_cast<PHINode>(User)) { +        if (UserPN->getIncomingBlock(UI) == BB) +          continue; +      } else if (User->getParent() == BB) +        continue; +       +      UsesToRename.push_back(&UI.getUse()); +    } +     +    // If there are no uses outside the block, we're done with this instruction. +    if (UsesToRename.empty()) +      continue; +     +    DEBUG(errs() << "JT: Renaming non-local uses of: " << *I << "\n"); +     +    // We found a use of I outside of BB.  Rename all uses of I that are outside +    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks +    // with the two values we know. +    SSAUpdate.Initialize(I); +    SSAUpdate.AddAvailableValue(BB, I); +    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]); +     +    while (!UsesToRename.empty()) +      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val()); +    DEBUG(errs() << "\n"); +  } +   +  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge +  // that we nuked. +  BB->removePredecessor(PredBB); +   +  // Remove the unconditional branch at the end of the PredBB block. +  OldPredBranch->eraseFromParent(); +   +  ++NumDupes; +  return true; +} + + diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index d6daeca1128c..756fbf3e7bd5 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -35,8 +35,8 @@  #include "llvm/Transforms/Scalar.h"  #include "llvm/Constants.h"  #include "llvm/DerivedTypes.h" +#include "llvm/IntrinsicInst.h"  #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h" @@ -46,8 +46,8 @@  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Transforms/Utils/PromoteMemToReg.h"  #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Support/Debug.h"  #include "llvm/ADT/Statistic.h"  #include <algorithm> @@ -73,7 +73,7 @@ EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden,                                    "global variables"));  namespace { -  struct VISIBILITY_HIDDEN LICM : public LoopPass { +  struct LICM : public LoopPass {      static char ID; // Pass identification, replacement for typeid      LICM() : LoopPass(&ID) {} @@ -91,6 +91,7 @@ namespace {        AU.addRequired<AliasAnalysis>();        AU.addPreserved<ScalarEvolution>();        AU.addPreserved<DominanceFrontier>(); +      AU.addPreservedID(LoopSimplifyID);      }      bool doFinalization() { @@ -338,7 +339,6 @@ void LICM::SinkRegion(DomTreeNode *N) {    }  } -  /// HoistRegion - Walk the specified region of the CFG (defined by all blocks  /// dominated by the specified block, and that are in the current loop) in depth  /// first order w.r.t the DominatorTree.  This allows us to visit definitions @@ -389,9 +389,13 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {      // Don't hoist loads which have may-aliased stores in loop.      unsigned Size = 0;      if (LI->getType()->isSized()) -      Size = AA->getTargetData().getTypeStoreSize(LI->getType()); +      Size = AA->getTypeStoreSize(LI->getType());      return !pointerInvalidatedByLoop(LI->getOperand(0), Size);    } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { +    if (isa<DbgStopPointInst>(CI)) { +      // Don't hoist/sink dbgstoppoints, we handle them separately +      return false; +    }      // Handle obvious cases efficiently.      AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);      if (Behavior == AliasAnalysis::DoesNotAccessMemory) @@ -465,7 +469,7 @@ bool LICM::isLoopInvariantInst(Instruction &I) {  /// position, and may either delete it or move it to outside of the loop.  ///  void LICM::sink(Instruction &I) { -  DOUT << "LICM sinking instruction: " << I; +  DEBUG(errs() << "LICM sinking instruction: " << I);    SmallVector<BasicBlock*, 8> ExitBlocks;    CurLoop->getExitBlocks(ExitBlocks); @@ -482,22 +486,27 @@ void LICM::sink(Instruction &I) {      if (!isExitBlockDominatedByBlockInLoop(ExitBlocks[0], I.getParent())) {        // Instruction is not used, just delete it.        CurAST->deleteValue(&I); -      if (!I.use_empty())  // If I has users in unreachable blocks, eliminate. -        I.replaceAllUsesWith(Context->getUndef(I.getType())); +      // If I has users in unreachable blocks, eliminate. +      // If I is not void type then replaceAllUsesWith undef. +      // This allows ValueHandlers and custom metadata to adjust itself. +      if (!I.getType()->isVoidTy()) +        I.replaceAllUsesWith(UndefValue::get(I.getType()));        I.eraseFromParent();      } else {        // Move the instruction to the start of the exit block, after any PHI        // nodes in it.        I.removeFromParent(); -        BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();        ExitBlocks[0]->getInstList().insert(InsertPt, &I);      }    } else if (ExitBlocks.empty()) {      // The instruction is actually dead if there ARE NO exit blocks.      CurAST->deleteValue(&I); -    if (!I.use_empty())  // If I has users in unreachable blocks, eliminate. -      I.replaceAllUsesWith(Context->getUndef(I.getType())); +    // If I has users in unreachable blocks, eliminate. +    // If I is not void type then replaceAllUsesWith undef. +    // This allows ValueHandlers and custom metadata to adjust itself. +    if (!I.getType()->isVoidTy()) +      I.replaceAllUsesWith(UndefValue::get(I.getType()));      I.eraseFromParent();    } else {      // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to @@ -507,7 +516,7 @@ void LICM::sink(Instruction &I) {      // Firstly, we create a stack object to hold the value...      AllocaInst *AI = 0; -    if (I.getType() != Type::VoidTy) { +    if (!I.getType()->isVoidTy()) {        AI = new AllocaInst(I.getType(), 0, I.getName(),                            I.getParent()->getParent()->getEntryBlock().begin());        CurAST->add(AI); @@ -593,7 +602,7 @@ void LICM::sink(Instruction &I) {      if (AI) {        std::vector<AllocaInst*> Allocas;        Allocas.push_back(AI); -      PromoteMemToReg(Allocas, *DT, *DF, CurAST); +      PromoteMemToReg(Allocas, *DT, *DF, AI->getContext(), CurAST);      }    }  } @@ -602,7 +611,8 @@ void LICM::sink(Instruction &I) {  /// that is safe to hoist, this instruction is called to do the dirty work.  ///  void LICM::hoist(Instruction &I) { -  DOUT << "LICM hoisting to " << Preheader->getName() << ": " << I; +  DEBUG(errs() << "LICM hoisting to " << Preheader->getName() << ": " +        << I << "\n");    // Remove the instruction from its current basic block... but don't delete the    // instruction. @@ -623,7 +633,8 @@ void LICM::hoist(Instruction &I) {  ///  bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {    // If it is not a trapping instruction, it is always safe to hoist. -  if (!Inst.isTrapping()) return true; +  if (Inst.isSafeToSpeculativelyExecute()) +    return true;    // Otherwise we have to check to make sure that the instruction dominates all    // of the exit blocks.  If it doesn't, then there is a path out of the loop @@ -635,12 +646,6 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {    if (Inst.getParent() == CurLoop->getHeader())      return true; -  // It's always safe to load from a global or alloca. -  if (isa<LoadInst>(Inst)) -    if (isa<AllocationInst>(Inst.getOperand(0)) || -        isa<GlobalVariable>(Inst.getOperand(0))) -      return true; -    // Get the exit blocks for the current loop.    SmallVector<BasicBlock*, 8> ExitBlocks;    CurLoop->getExitBlocks(ExitBlocks); @@ -773,7 +778,7 @@ void LICM::PromoteValuesInLoop() {    PromotedAllocas.reserve(PromotedValues.size());    for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)      PromotedAllocas.push_back(PromotedValues[i].first); -  PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); +  PromoteMemToReg(PromotedAllocas, *DT, *DF, Preheader->getContext(), CurAST);  }  /// FindPromotableValuesInLoop - Check the current loop for stores to definite @@ -862,7 +867,7 @@ void LICM::FindPromotableValuesInLoop(      for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)        ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI)); -    DOUT << "LICM: Promoting value: " << *V << "\n"; +    DEBUG(errs() << "LICM: Promoting value: " << *V << "\n");    }  } diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 302cdec2ba4a..5f93756a05c0 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -15,19 +15,17 @@  //===----------------------------------------------------------------------===//  #define DEBUG_TYPE "loop-delete" -  #include "llvm/Transforms/Scalar.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/SmallVector.h" -  using namespace llvm;  STATISTIC(NumDeleted, "Number of loops deleted");  namespace { -  class VISIBILITY_HIDDEN LoopDeletion : public LoopPass { +  class LoopDeletion : public LoopPass {    public:      static char ID; // Pass ID, replacement for typeid      LoopDeletion() : LoopPass(&ID) {} @@ -38,9 +36,9 @@ namespace {      bool SingleDominatingExit(Loop* L,                                SmallVector<BasicBlock*, 4>& exitingBlocks);      bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, -                    SmallVector<BasicBlock*, 4>& exitBlocks); -    bool IsLoopInvariantInst(Instruction *I, Loop* L); -     +                    SmallVector<BasicBlock*, 4>& exitBlocks, +                    bool &Changed, BasicBlock *Preheader); +      virtual void getAnalysisUsage(AnalysisUsage& AU) const {        AU.addRequired<ScalarEvolution>();        AU.addRequired<DominatorTree>(); @@ -84,32 +82,13 @@ bool LoopDeletion::SingleDominatingExit(Loop* L,    return DT.dominates(exitingBlocks[0], latch);  } -/// IsLoopInvariantInst - Checks if an instruction is invariant with respect to -/// a loop, which is defined as being true if all of its operands are defined -/// outside of the loop.  These instructions can be hoisted out of the loop -/// if their results are needed.  This could be made more aggressive by -/// recursively checking the operands for invariance, but it's not clear that -/// it's worth it. -bool LoopDeletion::IsLoopInvariantInst(Instruction *I, Loop* L)  { -  // PHI nodes are not loop invariant if defined in  the loop. -  if (isa<PHINode>(I) && L->contains(I->getParent())) -    return false; -     -  // The instruction is loop invariant if all of its operands are loop-invariant -  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) -    if (!L->isLoopInvariant(I->getOperand(i))) -      return false; -   -  // If we got this far, the instruction is loop invariant! -  return true; -} -  /// IsLoopDead - Determined if a loop is dead.  This assumes that we've already  /// checked for unique exit and exiting blocks, and that the code is in LCSSA  /// form.  bool LoopDeletion::IsLoopDead(Loop* L,                                SmallVector<BasicBlock*, 4>& exitingBlocks, -                              SmallVector<BasicBlock*, 4>& exitBlocks) { +                              SmallVector<BasicBlock*, 4>& exitBlocks, +                              bool &Changed, BasicBlock *Preheader) {    BasicBlock* exitingBlock = exitingBlocks[0];    BasicBlock* exitBlock = exitBlocks[0]; @@ -122,7 +101,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,    while (PHINode* P = dyn_cast<PHINode>(BI)) {      Value* incoming = P->getIncomingValueForBlock(exitingBlock);      if (Instruction* I = dyn_cast<Instruction>(incoming)) -      if (!IsLoopInvariantInst(I, L)) +      if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))          return false;      BI++; @@ -181,15 +160,16 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {      return false;    // Finally, we have to check that the loop really is dead. -  if (!IsLoopDead(L, exitingBlocks, exitBlocks)) -    return false; +  bool Changed = false; +  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) +    return Changed;    // Don't remove loops for which we can't solve the trip count.    // They could be infinite, in which case we'd be changing program behavior.    ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); -  const SCEV* S = SE.getBackedgeTakenCount(L); +  const SCEV *S = SE.getBackedgeTakenCount(L);    if (isa<SCEVCouldNotCompute>(S)) -    return false; +    return Changed;    // Now that we know the removal is safe, remove the loop by changing the    // branch from the preheader to go to the single exit block.   @@ -199,18 +179,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {    // Because we're deleting a large chunk of code at once, the sequence in which    // we remove things is very important to avoid invalidation issues.  Don't    // mess with this unless you have good reason and know what you're doing. -   -  // Move simple loop-invariant expressions out of the loop, since they -  // might be needed by the exit phis. -  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); -       LI != LE; ++LI) -    for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); -         BI != BE; ) { -      Instruction* I = BI++; -      if (!I->use_empty() && IsLoopInvariantInst(I, L)) -        I->moveBefore(preheader->getTerminator()); -    } -   + +  // Tell ScalarEvolution that the loop is deleted. Do this before +  // deleting the loop so that ScalarEvolution can look at the loop +  // to determine what it needs to clean up. +  SE.forgetLoopBackedgeTakenCount(L); +    // Connect the preheader directly to the exit block.    TerminatorInst* TI = preheader->getTerminator();    TI->replaceUsesOfWith(L->getHeader(), exitBlock); @@ -248,11 +222,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {      (*LI)->dropAllReferences();    } -  // Tell ScalarEvolution that the loop is deleted. Do this before -  // deleting the loop so that ScalarEvolution can look at the loop -  // to determine what it needs to clean up. -  SE.forgetLoopBackedgeTakenCount(L); -    // Erase the instructions and the blocks without having to worry    // about ordering because we already dropped the references.    // NOTE: This iteration is safe because erasing the block does not remove its @@ -273,8 +242,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {    // The last step is to inform the loop pass manager that we've    // eliminated this loop.    LPM.deleteLoopFromQueue(L); +  Changed = true;    NumDeleted++; -  return true; +  return Changed;  } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 38e3a8b7af70..5f9d3703da99 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -51,7 +51,6 @@  //===----------------------------------------------------------------------===//  #define DEBUG_TYPE "loop-index-split" -  #include "llvm/Transforms/Scalar.h"  #include "llvm/IntrinsicInst.h"  #include "llvm/LLVMContext.h" @@ -61,7 +60,6 @@  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Compiler.h"  #include "llvm/ADT/DepthFirstIterator.h"  #include "llvm/ADT/Statistic.h" @@ -73,8 +71,7 @@ STATISTIC(NumRestrictBounds, "Number of loop iteration space restricted");  namespace { -  class VISIBILITY_HIDDEN LoopIndexSplit : public LoopPass { - +  class LoopIndexSplit : public LoopPass {    public:      static char ID; // Pass ID, replacement for typeid      LoopIndexSplit() : LoopPass(&ID) {} @@ -294,31 +291,33 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {  // Return V+1  static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt,  -                         LLVMContext* Context) { -  Constant *One = Context->getConstantInt(V->getType(), 1, Sign); +                         LLVMContext &Context) { +  Constant *One = ConstantInt::get(V->getType(), 1, Sign);    return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);  }  // Return V-1  static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt, -                          LLVMContext* Context) { -  Constant *One = Context->getConstantInt(V->getType(), 1, Sign); +                          LLVMContext &Context) { +  Constant *One = ConstantInt::get(V->getType(), 1, Sign);    return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);  }  // Return min(V1, V1)  static Value *getMin(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { -  Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, -                          V1, V2, "lsp", InsertPt); +  Value *C = new ICmpInst(InsertPt, +                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, +                          V1, V2, "lsp");    return SelectInst::Create(C, V1, V2, "lsp", InsertPt);  }  // Return max(V1, V2)  static Value *getMax(Value *V1, Value *V2, bool Sign, Instruction *InsertPt) { -  Value *C = new ICmpInst(Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, -                          V1, V2, "lsp", InsertPt); +  Value *C = new ICmpInst(InsertPt,  +                          Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, +                          V1, V2, "lsp");    return SelectInst::Create(C, V2, V1, "lsp", InsertPt);  } @@ -427,15 +426,15 @@ bool LoopIndexSplit::processOneIterationLoop() {    //      c1 = icmp uge i32 SplitValue, StartValue    //      c2 = icmp ult i32 SplitValue, ExitValue    //      and i32 c1, c2  -  Instruction *C1 = new ICmpInst(ExitCondition->isSignedPredicate() ?  +  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSignedPredicate() ?                                    ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, -                                 SplitValue, StartValue, "lisplit", BR); +                                 SplitValue, StartValue, "lisplit");    CmpInst::Predicate C2P  = ExitCondition->getPredicate();    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); -  if (LatchBR->getOperand(0) != Header) +  if (LatchBR->getOperand(1) != Header)      C2P = CmpInst::getInversePredicate(C2P); -  Instruction *C2 = new ICmpInst(C2P, SplitValue, ExitValue, "lisplit", BR); +  Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");    Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);    SplitCondition->replaceAllUsesWith(NSplitCond); @@ -491,6 +490,8 @@ bool LoopIndexSplit::restrictLoopBound(ICmpInst &Op) {      EBR->setSuccessor(1, T);    } +  LLVMContext &Context = Op.getContext(); +    // New upper and lower bounds.    Value *NLB = NULL;    Value *NUB = NULL; @@ -698,7 +699,8 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,           E = df_end(DN); DI != E; ++DI) {      BasicBlock *BB = DI->getBlock();      WorkList.push_back(BB); -    BB->replaceAllUsesWith(UndefValue::get(Type::LabelTy)); +    BB->replaceAllUsesWith(UndefValue::get( +                                       Type::getLabelTy(DeadBB->getContext())));    }    while (!WorkList.empty()) { @@ -877,6 +879,8 @@ bool LoopIndexSplit::splitLoop() {    BasicBlock *ExitingBlock = ExitCondition->getParent();    if (!cleanBlock(ExitingBlock)) return false; +  LLVMContext &Context = Header->getContext(); +    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();         I != E; ++I) {      BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator()); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 1f7892ad1015..70c69bb1dae0 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -32,7 +32,7 @@ using namespace llvm;  STATISTIC(NumRotated, "Number of loops rotated");  namespace { -  class VISIBILITY_HIDDEN RenameData { +  class RenameData {    public:      RenameData(Instruction *O, Value *P, Instruction *H)         : Original(O), PreHeader(P), Header(H) { } @@ -42,8 +42,7 @@ namespace {      Instruction *Header; // New header replacement    }; -  class VISIBILITY_HIDDEN LoopRotate : public LoopPass { - +  class LoopRotate : public LoopPass {    public:      static char ID; // Pass ID, replacement for typeid      LoopRotate() : LoopPass(&ID) {} @@ -178,6 +177,11 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {    // Now, this loop is suitable for rotation. +  // Anything ScalarEvolution may know about this loop or the PHI nodes +  // in its header will soon be invalidated. +  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) +    SE->forgetLoopBackedgeTakenCount(L); +    // Find new Loop header. NewHeader is a Header's one and only successor    // that is inside loop.  Header's other successor is outside the    // loop.  Otherwise loop is not suitable for rotation. @@ -435,7 +439,8 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {    // Right now original pre-header has two successors, new header and    // exit block. Insert new block between original pre-header and    // new header such that loop's new pre-header has only one successor. -  BasicBlock *NewPreHeader = BasicBlock::Create("bb.nph", +  BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(), +                                                "bb.nph",                                                  OrigHeader->getParent(),                                                   NewHeader);    LoopInfo &LI = LPM.getAnalysis<LoopInfo>(); @@ -511,26 +516,30 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {        DF->addBasicBlock(L->getHeader(), LatchSet);      } -    // If a loop block dominates new loop latch then its frontier is -    // new header and Exit. +    // If a loop block dominates new loop latch then add to its frontiers +    // new header and Exit and remove new latch (which is equal to original +    // header).      BasicBlock *NewLatch = L->getLoopLatch(); -    DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>(); -    for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); -         BI != BE; ++BI) { -      BasicBlock *B = *BI; -      if (DT->dominates(B, NewLatch)) { -        DominanceFrontier::iterator BDFI = DF->find(B); -        if (BDFI != DF->end()) { -          DominanceFrontier::DomSetType &BSet = BDFI->second; -          BSet = BDFI->second; -          BSet.clear(); -          BSet.insert(L->getHeader()); -          BSet.insert(Exit); -        } else { -          DominanceFrontier::DomSetType BSet; -          BSet.insert(L->getHeader()); -          BSet.insert(Exit); -          DF->addBasicBlock(B, BSet); + +    assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader"); + +    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { +      for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); +           BI != BE; ++BI) { +        BasicBlock *B = *BI; +        if (DT->dominates(B, NewLatch)) { +          DominanceFrontier::iterator BDFI = DF->find(B); +          if (BDFI != DF->end()) { +            DominanceFrontier::DomSetType &BSet = BDFI->second; +            BSet.erase(NewLatch); +            BSet.insert(L->getHeader()); +            BSet.insert(Exit); +          } else { +            DominanceFrontier::DomSetType BSet; +            BSet.insert(L->getHeader()); +            BSet.insert(Exit); +            DF->addBasicBlock(B, BSet); +          }          }        }      } @@ -538,22 +547,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {    // Preserve canonical loop form, which means Exit block should    // have only one predecessor. -  BasicBlock *NExit = SplitEdge(L->getLoopLatch(), Exit, this); - -  // Preserve LCSSA. -  for (BasicBlock::iterator I = Exit->begin(); -       (PN = dyn_cast<PHINode>(I)); ++I) { -    unsigned N = PN->getNumIncomingValues(); -    for (unsigned index = 0; index != N; ++index) -      if (PN->getIncomingBlock(index) == NExit) { -        PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName(), -                                         NExit->begin()); -        NewPN->addIncoming(PN->getIncomingValue(index), L->getLoopLatch()); -        PN->setIncomingValue(index, NewPN); -        PN->setIncomingBlock(index, NExit); -        break; -      } -  } +  SplitEdge(L->getLoopLatch(), Exit, this);    assert(NewHeader && L->getHeader() == NewHeader &&           "Invalid loop header after loop rotation"); diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 046fed3d7157..d8f6cc18a1e9 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -24,7 +24,6 @@  #include "llvm/Constants.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h"  #include "llvm/Type.h"  #include "llvm/DerivedTypes.h"  #include "llvm/Analysis/Dominators.h" @@ -38,9 +37,9 @@  #include "llvm/ADT/Statistic.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/Debug.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetLowering.h"  #include <algorithm>  using namespace llvm; @@ -64,26 +63,26 @@ namespace {    /// IVInfo - This structure keeps track of one IV expression inserted during    /// StrengthReduceStridedIVUsers. It contains the stride, the common base, as    /// well as the PHI node and increment value created for rewrite. -  struct VISIBILITY_HIDDEN IVExpr { -    const SCEV*  Stride; -    const SCEV*  Base; +  struct IVExpr { +    const SCEV *Stride; +    const SCEV *Base;      PHINode    *PHI; -    IVExpr(const SCEV* const stride, const SCEV* const base, PHINode *phi) +    IVExpr(const SCEV *const stride, const SCEV *const base, PHINode *phi)        : Stride(stride), Base(base), PHI(phi) {}    };    /// IVsOfOneStride - This structure keeps track of all IV expression inserted    /// during StrengthReduceStridedIVUsers for a particular stride of the IV. -  struct VISIBILITY_HIDDEN IVsOfOneStride { +  struct IVsOfOneStride {      std::vector<IVExpr> IVs; -    void addIV(const SCEV* const Stride, const SCEV* const Base, PHINode *PHI) { +    void addIV(const SCEV *const Stride, const SCEV *const Base, PHINode *PHI) {        IVs.push_back(IVExpr(Stride, Base, PHI));      }    }; -  class VISIBILITY_HIDDEN LoopStrengthReduce : public LoopPass { +  class LoopStrengthReduce : public LoopPass {      IVUsers *IU;      LoopInfo *LI;      DominatorTree *DT; @@ -92,11 +91,11 @@ namespace {      /// IVsByStride - Keep track of all IVs that have been inserted for a      /// particular stride. -    std::map<const SCEV*, IVsOfOneStride> IVsByStride; +    std::map<const SCEV *, IVsOfOneStride> IVsByStride;      /// StrideNoReuse - Keep track of all the strides whose ivs cannot be      /// reused (nor should they be rewritten to reuse other strides). -    SmallSet<const SCEV*, 4> StrideNoReuse; +    SmallSet<const SCEV *, 4> StrideNoReuse;      /// DeadInsts - Keep track of instructions we may have made dead, so that      /// we can remove them after we are done working. @@ -134,7 +133,7 @@ namespace {    private:      ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,                                    IVStrideUse* &CondUse, -                                  const SCEV* const *  &CondStride); +                                  const SCEV *const *  &CondStride);      void OptimizeIndvars(Loop *L);      void OptimizeLoopCountIV(Loop *L); @@ -150,16 +149,16 @@ namespace {                            IVStrideUse* &CondUse);      bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, -                           const SCEV* const * &CondStride); +                           const SCEV *const * &CondStride);      bool RequiresTypeConversion(const Type *Ty, const Type *NewTy); -    const SCEV* CheckForIVReuse(bool, bool, bool, const SCEV* const&, +    const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,                               IVExpr&, const Type*,                               const std::vector<BasedUser>& UsersToProcess);      bool ValidScale(bool, int64_t,                      const std::vector<BasedUser>& UsersToProcess);      bool ValidOffset(bool, int64_t, int64_t,                       const std::vector<BasedUser>& UsersToProcess); -    const SCEV* CollectIVUsers(const SCEV* const &Stride, +    const SCEV *CollectIVUsers(const SCEV *const &Stride,                                IVUsersOfOneStride &Uses,                                Loop *L,                                bool &AllUsesAreAddresses, @@ -169,11 +168,11 @@ namespace {                                  const std::vector<BasedUser> &UsersToProcess,                                  const Loop *L,                                  bool AllUsesAreAddresses, -                                const SCEV* Stride); +                                const SCEV *Stride);      void PrepareToStrengthReduceFully(                               std::vector<BasedUser> &UsersToProcess, -                             const SCEV* Stride, -                             const SCEV* CommonExprs, +                             const SCEV *Stride, +                             const SCEV *CommonExprs,                               const Loop *L,                               SCEVExpander &PreheaderRewriter);      void PrepareToStrengthReduceFromSmallerStride( @@ -183,13 +182,13 @@ namespace {                                           Instruction *PreInsertPt);      void PrepareToStrengthReduceWithNewPhi(                                    std::vector<BasedUser> &UsersToProcess, -                                  const SCEV* Stride, -                                  const SCEV* CommonExprs, +                                  const SCEV *Stride, +                                  const SCEV *CommonExprs,                                    Value *CommonBaseV,                                    Instruction *IVIncInsertPt,                                    const Loop *L,                                    SCEVExpander &PreheaderRewriter); -    void StrengthReduceStridedIVUsers(const SCEV* const &Stride, +    void StrengthReduceStridedIVUsers(const SCEV *const &Stride,                                        IVUsersOfOneStride &Uses,                                        Loop *L);      void DeleteTriviallyDeadInstructions(); @@ -233,7 +232,7 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {  /// containsAddRecFromDifferentLoop - Determine whether expression S involves a   /// subexpression that is an AddRec from a loop other than L.  An outer loop   /// of L is OK, but not an inner loop nor a disjoint loop. -static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) { +static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {    // This is very common, put it first.    if (isa<SCEVConstant>(S))      return false; @@ -248,7 +247,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV* S, Loop *L) {        if (newLoop == L)          return false;        // if newLoop is an outer loop of L, this is OK. -      if (!LoopInfoBase<BasicBlock>::isNotAlreadyContainedIn(L, newLoop)) +      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))          return false;      }      return true; @@ -328,7 +327,7 @@ namespace {      /// this use.  As the use is processed, information gets moved from this      /// field to the Imm field (below).  BasedUser values are sorted by this      /// field. -    const SCEV* Base; +    const SCEV *Base;      /// Inst - The instruction using the induction variable.      Instruction *Inst; @@ -341,7 +340,7 @@ namespace {      /// before Inst, because it will be folded into the imm field of the      /// instruction.  This is also sometimes used for loop-variant values that      /// must be added inside the loop. -    const SCEV* Imm; +    const SCEV *Imm;      /// Phi - The induction variable that performs the striding that      /// should be used for this user. @@ -363,13 +362,13 @@ namespace {      // Once we rewrite the code to insert the new IVs we want, update the      // operands of Inst to use the new expression 'NewBase', with 'Imm' added      // to it. -    void RewriteInstructionToUseNewBase(const SCEV* const &NewBase, +    void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,                                          Instruction *InsertPt,                                         SCEVExpander &Rewriter, Loop *L, Pass *P,                                          LoopInfo &LI,                                          SmallVectorImpl<WeakVH> &DeadInsts); -    Value *InsertCodeForBaseAtPosition(const SCEV* const &NewBase,  +    Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,                                          const Type *Ty,                                         SCEVExpander &Rewriter,                                         Instruction *IP, Loop *L, @@ -379,12 +378,12 @@ namespace {  }  void BasedUser::dump() const { -  cerr << " Base=" << *Base; -  cerr << " Imm=" << *Imm; -  cerr << "   Inst: " << *Inst; +  errs() << " Base=" << *Base; +  errs() << " Imm=" << *Imm; +  errs() << "   Inst: " << *Inst;  } -Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,  +Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,                                                 const Type *Ty,                                                SCEVExpander &Rewriter,                                                Instruction *IP, Loop *L, @@ -408,7 +407,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,    Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt); -  const SCEV* NewValSCEV = SE->getUnknown(Base); +  const SCEV *NewValSCEV = SE->getUnknown(Base);    // Always emit the immediate into the same block as the user.    NewValSCEV = SE->getAddExpr(NewValSCEV, Imm); @@ -423,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,  // value of NewBase in the case that it's a diffferent instruction from  // the PHI that NewBase is computed from, or null otherwise.  // -void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase, +void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,                                                 Instruction *NewBasePt,                                        SCEVExpander &Rewriter, Loop *L, Pass *P,                                        LoopInfo &LI, @@ -460,9 +459,10 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,      // Replace the use of the operand Value with the new Phi we just created.      Inst->replaceUsesOfWith(OperandValToReplace, NewVal); -    DOUT << "      Replacing with "; -    DEBUG(WriteAsOperand(*DOUT, NewVal, /*PrintType=*/false)); -    DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; +    DEBUG(errs() << "      Replacing with "); +    DEBUG(WriteAsOperand(errs(), NewVal, /*PrintType=*/false)); +    DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " +                 << *Imm << "\n");      return;    } @@ -483,43 +483,45 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,        // loop because multiple copies sometimes do useful sinking of code in        // that case(?).        Instruction *OldLoc = dyn_cast<Instruction>(OperandValToReplace); +      BasicBlock *PHIPred = PN->getIncomingBlock(i);        if (L->contains(OldLoc->getParent())) {          // If this is a critical edge, split the edge so that we do not insert          // the code on all predecessor/successor paths.  We do this unless this          // is the canonical backedge for this loop, as this can make some          // inserted code be in an illegal position. -        BasicBlock *PHIPred = PN->getIncomingBlock(i);          if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&              (PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {            // First step, split the critical edge. -          SplitCriticalEdge(PHIPred, PN->getParent(), P, false); +          BasicBlock *NewBB = SplitCriticalEdge(PHIPred, PN->getParent(), +                                                P, false);            // Next step: move the basic block.  In particular, if the PHI node            // is outside of the loop, and PredTI is in the loop, we want to            // move the block to be immediately before the PHI block, not            // immediately after PredTI. -          if (L->contains(PHIPred) && !L->contains(PN->getParent())) { -            BasicBlock *NewBB = PN->getIncomingBlock(i); +          if (L->contains(PHIPred) && !L->contains(PN->getParent()))              NewBB->moveBefore(PN->getParent()); -          }            // Splitting the edge can reduce the number of PHI entries we have.            e = PN->getNumIncomingValues(); +          PHIPred = NewBB; +          i = PN->getBasicBlockIndex(PHIPred);          }        } -      Value *&Code = InsertedCode[PN->getIncomingBlock(i)]; +      Value *&Code = InsertedCode[PHIPred];        if (!Code) {          // Insert the code into the end of the predecessor block.          Instruction *InsertPt = (L->contains(OldLoc->getParent())) ? -                                PN->getIncomingBlock(i)->getTerminator() : +                                PHIPred->getTerminator() :                                  OldLoc->getParent()->getTerminator();          Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),                                             Rewriter, InsertPt, L, LI); -        DOUT << "      Changing PHI use to "; -        DEBUG(WriteAsOperand(*DOUT, Code, /*PrintType=*/false)); -        DOUT << ", which has value " << *NewBase << " plus IMM " << *Imm << "\n"; +        DEBUG(errs() << "      Changing PHI use to "); +        DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false)); +        DEBUG(errs() << ", which has value " << *NewBase << " plus IMM " +                     << *Imm << "\n");        }        // Replace the use of the operand Value with the new Phi we just created. @@ -535,7 +537,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV* const &NewBase,  /// fitsInAddressMode - Return true if V can be subsumed within an addressing  /// mode, and does not need to be put in a register first. -static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy, +static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,                               const TargetLowering *TLI, bool HasBaseReg) {    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) {      int64_t VC = SC->getValue()->getSExtValue(); @@ -567,12 +569,12 @@ static bool fitsInAddressMode(const SCEV* const &V, const Type *AccessTy,  /// MoveLoopVariantsToImmediateField - Move any subexpressions from Val that are  /// loop varying to the Imm operand. -static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm, +static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,                                               Loop *L, ScalarEvolution *SE) {    if (Val->isLoopInvariant(L)) return;  // Nothing to do.    if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { -    SmallVector<const SCEV*, 4> NewOps; +    SmallVector<const SCEV *, 4> NewOps;      NewOps.reserve(SAE->getNumOperands());      for (unsigned i = 0; i != SAE->getNumOperands(); ++i) @@ -590,10 +592,10 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,        Val = SE->getAddExpr(NewOps);    } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {      // Try to pull immediates out of the start value of nested addrec's. -    const SCEV* Start = SARE->getStart(); +    const SCEV *Start = SARE->getStart();      MoveLoopVariantsToImmediateField(Start, Imm, L, SE); -    SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); +    SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());      Ops[0] = Start;      Val = SE->getAddRecExpr(Ops, SARE->getLoop());    } else { @@ -609,15 +611,15 @@ static void MoveLoopVariantsToImmediateField(const SCEV* &Val, const SCEV* &Imm,  /// Accumulate these immediate values into the Imm value.  static void MoveImmediateValues(const TargetLowering *TLI,                                  const Type *AccessTy, -                                const SCEV* &Val, const SCEV* &Imm, +                                const SCEV *&Val, const SCEV *&Imm,                                  bool isAddress, Loop *L,                                  ScalarEvolution *SE) {    if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) { -    SmallVector<const SCEV*, 4> NewOps; +    SmallVector<const SCEV *, 4> NewOps;      NewOps.reserve(SAE->getNumOperands());      for (unsigned i = 0; i != SAE->getNumOperands(); ++i) { -      const SCEV* NewOp = SAE->getOperand(i); +      const SCEV *NewOp = SAE->getOperand(i);        MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);        if (!NewOp->isLoopInvariant(L)) { @@ -636,11 +638,11 @@ static void MoveImmediateValues(const TargetLowering *TLI,      return;    } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Val)) {      // Try to pull immediates out of the start value of nested addrec's. -    const SCEV* Start = SARE->getStart(); +    const SCEV *Start = SARE->getStart();      MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);      if (Start != SARE->getStart()) { -      SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); +      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());        Ops[0] = Start;        Val = SE->getAddRecExpr(Ops, SARE->getLoop());      } @@ -651,8 +653,8 @@ static void MoveImmediateValues(const TargetLowering *TLI,          fitsInAddressMode(SME->getOperand(0), AccessTy, TLI, false) &&          SME->getNumOperands() == 2 && SME->isLoopInvariant(L)) { -      const SCEV* SubImm = SE->getIntegerSCEV(0, Val->getType()); -      const SCEV* NewOp = SME->getOperand(1); +      const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType()); +      const SCEV *NewOp = SME->getOperand(1);        MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);        // If we extracted something out of the subexpressions, see if we can  @@ -687,7 +689,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,  static void MoveImmediateValues(const TargetLowering *TLI,                                  Instruction *User, -                                const SCEV* &Val, const SCEV* &Imm, +                                const SCEV *&Val, const SCEV *&Imm,                                  bool isAddress, Loop *L,                                  ScalarEvolution *SE) {    const Type *AccessTy = getAccessType(User); @@ -697,19 +699,19 @@ static void MoveImmediateValues(const TargetLowering *TLI,  /// SeparateSubExprs - Decompose Expr into all of the subexpressions that are  /// added together.  This is used to reassociate common addition subexprs  /// together for maximal sharing when rewriting bases. -static void SeparateSubExprs(SmallVector<const SCEV*, 16> &SubExprs, -                             const SCEV* Expr, +static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs, +                             const SCEV *Expr,                               ScalarEvolution *SE) {    if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {      for (unsigned j = 0, e = AE->getNumOperands(); j != e; ++j)        SeparateSubExprs(SubExprs, AE->getOperand(j), SE);    } else if (const SCEVAddRecExpr *SARE = dyn_cast<SCEVAddRecExpr>(Expr)) { -    const SCEV* Zero = SE->getIntegerSCEV(0, Expr->getType()); +    const SCEV *Zero = SE->getIntegerSCEV(0, Expr->getType());      if (SARE->getOperand(0) == Zero) {        SubExprs.push_back(Expr);      } else {        // Compute the addrec with zero as its base. -      SmallVector<const SCEV*, 4> Ops(SARE->op_begin(), SARE->op_end()); +      SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());        Ops[0] = Zero;   // Start with zero base.        SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop())); @@ -733,7 +735,7 @@ struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };  /// not remove anything.  This looks for things like (a+b+c) and  /// (a+c+d) and computes the common (a+c) subexpression.  The common expression  /// is *removed* from the Bases and returned. -static const SCEV*  +static const SCEV *  RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,                                      ScalarEvolution *SE, Loop *L,                                      const TargetLowering *TLI) { @@ -741,9 +743,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,    // Only one use?  This is a very common case, so we handle it specially and    // cheaply. -  const SCEV* Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); -  const SCEV* Result = Zero; -  const SCEV* FreeResult = Zero; +  const SCEV *Zero = SE->getIntegerSCEV(0, Uses[0].Base->getType()); +  const SCEV *Result = Zero; +  const SCEV *FreeResult = Zero;    if (NumUses == 1) {      // If the use is inside the loop, use its base, regardless of what it is:      // it is clearly shared across all the IV's.  If the use is outside the loop @@ -759,13 +761,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,    // Also track whether all uses of each expression can be moved into an    // an addressing mode "for free"; such expressions are left within the loop.    // struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; }; -  std::map<const SCEV*, SubExprUseData> SubExpressionUseData; +  std::map<const SCEV *, SubExprUseData> SubExpressionUseData;    // UniqueSubExprs - Keep track of all of the subexpressions we see in the    // order we see them. -  SmallVector<const SCEV*, 16> UniqueSubExprs; +  SmallVector<const SCEV *, 16> UniqueSubExprs; -  SmallVector<const SCEV*, 16> SubExprs; +  SmallVector<const SCEV *, 16> SubExprs;    unsigned NumUsesInsideLoop = 0;    for (unsigned i = 0; i != NumUses; ++i) {      // If the user is outside the loop, just ignore it for base computation. @@ -809,7 +811,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,    // Now that we know how many times each is used, build Result.  Iterate over    // UniqueSubexprs so that we have a stable ordering.    for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) { -    std::map<const SCEV*, SubExprUseData>::iterator I =  +    std::map<const SCEV *, SubExprUseData>::iterator I =          SubExpressionUseData.find(UniqueSubExprs[i]);      assert(I != SubExpressionUseData.end() && "Entry not found?");      if (I->second.Count == NumUsesInsideLoop) { // Found CSE!  @@ -853,7 +855,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,    if (FreeResult != Zero) {      SeparateSubExprs(SubExprs, FreeResult, SE);      for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) { -      std::map<const SCEV*, SubExprUseData>::iterator I =  +      std::map<const SCEV *, SubExprUseData>::iterator I =            SubExpressionUseData.find(SubExprs[j]);        SubExpressionUseData.erase(I);      } @@ -902,7 +904,8 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,    for (unsigned i = 0, e = UsersToProcess.size(); i!=e; ++i) {      // If this is a load or other access, pass the type of the access in. -    const Type *AccessTy = Type::VoidTy; +    const Type *AccessTy = +        Type::getVoidTy(UsersToProcess[i].Inst->getContext());      if (isAddressUse(UsersToProcess[i].Inst,                       UsersToProcess[i].OperandValToReplace))        AccessTy = getAccessType(UsersToProcess[i].Inst); @@ -934,7 +937,8 @@ bool LoopStrengthReduce::ValidOffset(bool HasBaseReg,    for (unsigned i=0, e = UsersToProcess.size(); i!=e; ++i) {      // If this is a load or other access, pass the type of the access in. -    const Type *AccessTy = Type::VoidTy; +    const Type *AccessTy = +        Type::getVoidTy(UsersToProcess[i].Inst->getContext());      if (isAddressUse(UsersToProcess[i].Inst,                       UsersToProcess[i].OperandValToReplace))        AccessTy = getAccessType(UsersToProcess[i].Inst); @@ -982,10 +986,10 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,  /// be folded into the addressing mode, nor even that the factor be constant;   /// a multiply (executed once) outside the loop is better than another IV   /// within.  Well, usually. -const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg, +const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,                                  bool AllUsesAreAddresses,                                  bool AllUsesAreOutsideLoop, -                                const SCEV* const &Stride,  +                                const SCEV *const &Stride,                                   IVExpr &IV, const Type *Ty,                                  const std::vector<BasedUser>& UsersToProcess) {    if (StrideNoReuse.count(Stride)) @@ -995,7 +999,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,      int64_t SInt = SC->getValue()->getSExtValue();      for (unsigned NewStride = 0, e = IU->StrideOrder.size();           NewStride != e; ++NewStride) { -      std::map<const SCEV*, IVsOfOneStride>::iterator SI =  +      std::map<const SCEV *, IVsOfOneStride>::iterator SI =                   IVsByStride.find(IU->StrideOrder[NewStride]);        if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||            StrideNoReuse.count(SI->first)) @@ -1048,7 +1052,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,      // an existing IV if we can.      for (unsigned NewStride = 0, e = IU->StrideOrder.size();           NewStride != e; ++NewStride) { -      std::map<const SCEV*, IVsOfOneStride>::iterator SI =  +      std::map<const SCEV *, IVsOfOneStride>::iterator SI =                   IVsByStride.find(IU->StrideOrder[NewStride]);        if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))          continue; @@ -1068,7 +1072,7 @@ const SCEV* LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,      // -1*old.      for (unsigned NewStride = 0, e = IU->StrideOrder.size();           NewStride != e; ++NewStride) { -      std::map<const SCEV*, IVsOfOneStride>::iterator SI =  +      std::map<const SCEV *, IVsOfOneStride>::iterator SI =                   IVsByStride.find(IU->StrideOrder[NewStride]);        if (SI == IVsByStride.end())           continue; @@ -1097,7 +1101,7 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {  /// isNonConstantNegative - Return true if the specified scev is negated, but  /// not a constant. -static bool isNonConstantNegative(const SCEV* const &Expr) { +static bool isNonConstantNegative(const SCEV *const &Expr) {    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);    if (!Mul) return false; @@ -1114,7 +1118,7 @@ static bool isNonConstantNegative(const SCEV* const &Expr) {  /// of the strided accesses, as well as the old information from Uses. We  /// progressively move information from the Base field to the Imm field, until  /// we eventually have the full access expression to rewrite the use. -const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride, +const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,                                                IVUsersOfOneStride &Uses,                                                Loop *L,                                                bool &AllUsesAreAddresses, @@ -1145,7 +1149,7 @@ const SCEV* LoopStrengthReduce::CollectIVUsers(const SCEV* const &Stride,    // for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find    // "A+B"), emit it to the preheader, then remove the expression from the    // UsersToProcess base values. -  const SCEV* CommonExprs = +  const SCEV *CommonExprs =      RemoveCommonExpressionsFromUseBases(UsersToProcess, SE, L, TLI);    // Next, figure out what we can represent in the immediate fields of @@ -1211,7 +1215,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(                                     const std::vector<BasedUser> &UsersToProcess,                                     const Loop *L,                                     bool AllUsesAreAddresses, -                                   const SCEV* Stride) { +                                   const SCEV *Stride) {    if (!EnableFullLSRMode)      return false; @@ -1248,7 +1252,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(          if (!Imm)       Imm = SE->getIntegerSCEV(0, Stride->getType());          const Instruction *Inst = UsersToProcess[i].Inst;          const Type *AccessTy = getAccessType(Inst); -        const SCEV* Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm); +        const SCEV *Diff = SE->getMinusSCEV(UsersToProcess[i].Imm, Imm);          if (!Diff->isZero() &&              (!AllUsesAreAddresses ||               !fitsInAddressMode(Diff, AccessTy, TLI, /*HasBaseReg=*/true))) @@ -1282,7 +1286,7 @@ bool LoopStrengthReduce::ShouldUseFullStrengthReductionMode(  ///  /// Return the created phi node.  /// -static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step, +static PHINode *InsertAffinePhi(const SCEV *Start, const SCEV *Step,                                  Instruction *IVIncInsertPt,                                  const Loop *L,                                  SCEVExpander &Rewriter) { @@ -1302,7 +1306,7 @@ static PHINode *InsertAffinePhi(const SCEV* Start, const SCEV* Step,    // If the stride is negative, insert a sub instead of an add for the    // increment.    bool isNegative = isNonConstantNegative(Step); -  const SCEV* IncAmount = Step; +  const SCEV *IncAmount = Step;    if (isNegative)      IncAmount = Rewriter.SE.getNegativeSCEV(Step); @@ -1341,13 +1345,13 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {    // loop before users outside of the loop with a particular base.    //    // We would like to use stable_sort here, but we can't.  The problem is that -  // const SCEV*'s don't have a deterministic ordering w.r.t to each other, so +  // const SCEV *'s don't have a deterministic ordering w.r.t to each other, so    // we don't have anything to do a '<' comparison on.  Because we think the    // number of uses is small, do a horrible bubble sort which just relies on    // ==.    for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) {      // Get a base value. -    const SCEV* Base = UsersToProcess[i].Base; +    const SCEV *Base = UsersToProcess[i].Base;      // Compact everything with this base to be consecutive with this one.      for (unsigned j = i+1; j != e; ++j) { @@ -1366,11 +1370,11 @@ static void SortUsersToProcess(std::vector<BasedUser> &UsersToProcess) {  void  LoopStrengthReduce::PrepareToStrengthReduceFully(                                          std::vector<BasedUser> &UsersToProcess, -                                        const SCEV* Stride, -                                        const SCEV* CommonExprs, +                                        const SCEV *Stride, +                                        const SCEV *CommonExprs,                                          const Loop *L,                                          SCEVExpander &PreheaderRewriter) { -  DOUT << "  Fully reducing all users\n"; +  DEBUG(errs() << "  Fully reducing all users\n");    // Rewrite the UsersToProcess records, creating a separate PHI for each    // unique Base value. @@ -1379,9 +1383,9 @@ LoopStrengthReduce::PrepareToStrengthReduceFully(      // TODO: The uses are grouped by base, but not sorted. We arbitrarily      // pick the first Imm value here to start with, and adjust it for the      // other uses. -    const SCEV* Imm = UsersToProcess[i].Imm; -    const SCEV* Base = UsersToProcess[i].Base; -    const SCEV* Start = SE->getAddExpr(CommonExprs, Base, Imm); +    const SCEV *Imm = UsersToProcess[i].Imm; +    const SCEV *Base = UsersToProcess[i].Base; +    const SCEV *Start = SE->getAddExpr(CommonExprs, Base, Imm);      PHINode *Phi = InsertAffinePhi(Start, Stride, IVIncInsertPt, L,                                     PreheaderRewriter);      // Loop over all the users with the same base. @@ -1413,13 +1417,13 @@ static Instruction *FindIVIncInsertPt(std::vector<BasedUser> &UsersToProcess,  void  LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(                                           std::vector<BasedUser> &UsersToProcess, -                                         const SCEV* Stride, -                                         const SCEV* CommonExprs, +                                         const SCEV *Stride, +                                         const SCEV *CommonExprs,                                           Value *CommonBaseV,                                           Instruction *IVIncInsertPt,                                           const Loop *L,                                           SCEVExpander &PreheaderRewriter) { -  DOUT << "  Inserting new PHI:\n"; +  DEBUG(errs() << "  Inserting new PHI:\n");    PHINode *Phi = InsertAffinePhi(SE->getUnknown(CommonBaseV),                                   Stride, IVIncInsertPt, L, @@ -1432,9 +1436,9 @@ LoopStrengthReduce::PrepareToStrengthReduceWithNewPhi(    for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)      UsersToProcess[i].Phi = Phi; -  DOUT << "    IV="; -  DEBUG(WriteAsOperand(*DOUT, Phi, /*PrintType=*/false)); -  DOUT << "\n"; +  DEBUG(errs() << "    IV="); +  DEBUG(WriteAsOperand(errs(), Phi, /*PrintType=*/false)); +  DEBUG(errs() << "\n");  }  /// PrepareToStrengthReduceFromSmallerStride - Prepare for the given users to @@ -1447,8 +1451,8 @@ LoopStrengthReduce::PrepareToStrengthReduceFromSmallerStride(                                           Value *CommonBaseV,                                           const IVExpr &ReuseIV,                                           Instruction *PreInsertPt) { -  DOUT << "  Rewriting in terms of existing IV of STRIDE " << *ReuseIV.Stride -       << " and BASE " << *ReuseIV.Base << "\n"; +  DEBUG(errs() << "  Rewriting in terms of existing IV of STRIDE " +               << *ReuseIV.Stride << " and BASE " << *ReuseIV.Base << "\n");    // All the users will share the reused IV.    for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i) @@ -1490,7 +1494,7 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,  /// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single  /// stride of IV.  All of the users may have different starting values, and this  /// may not be the only stride. -void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride, +void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,                                                        IVUsersOfOneStride &Uses,                                                        Loop *L) {    // If all the users are moved to another stride, then there is nothing to do. @@ -1513,7 +1517,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,    // move information from the Base field to the Imm field, until we eventually    // have the full access expression to rewrite the use.    std::vector<BasedUser> UsersToProcess; -  const SCEV* CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses, +  const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,                                            AllUsesAreOutsideLoop,                                            UsersToProcess); @@ -1531,9 +1535,11 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,    // If all uses are addresses, consider sinking the immediate part of the    // common expression back into uses if they can fit in the immediate fields.    if (TLI && HaveCommonExprs && AllUsesAreAddresses) { -    const SCEV* NewCommon = CommonExprs; -    const SCEV* Imm = SE->getIntegerSCEV(0, ReplacedTy); -    MoveImmediateValues(TLI, Type::VoidTy, NewCommon, Imm, true, L, SE); +    const SCEV *NewCommon = CommonExprs; +    const SCEV *Imm = SE->getIntegerSCEV(0, ReplacedTy); +    MoveImmediateValues(TLI, Type::getVoidTy( +                        L->getLoopPreheader()->getContext()), +                        NewCommon, Imm, true, L, SE);      if (!Imm->isZero()) {        bool DoSink = true; @@ -1548,11 +1554,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,        if (GV || Offset)          // Pass VoidTy as the AccessTy to be conservative, because          // there could be multiple access types among all the uses. -        DoSink = IsImmFoldedIntoAddrMode(GV, Offset, Type::VoidTy, +        DoSink = IsImmFoldedIntoAddrMode(GV, Offset, +                          Type::getVoidTy(L->getLoopPreheader()->getContext()),                                           UsersToProcess, TLI);        if (DoSink) { -        DOUT << "  Sinking " << *Imm << " back down into uses\n"; +        DEBUG(errs() << "  Sinking " << *Imm << " back down into uses\n");          for (unsigned i = 0, e = UsersToProcess.size(); i != e; ++i)            UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm, Imm);          CommonExprs = NewCommon; @@ -1564,9 +1571,9 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,    // Now that we know what we need to do, insert the PHI node itself.    // -  DOUT << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " -       << *Stride << ":\n" -       << "  Common base: " << *CommonExprs << "\n"; +  DEBUG(errs() << "LSR: Examining IVs of TYPE " << *ReplacedTy << " of STRIDE " +               << *Stride << ":\n" +               << "  Common base: " << *CommonExprs << "\n");    SCEVExpander Rewriter(*SE);    SCEVExpander PreheaderRewriter(*SE); @@ -1576,11 +1583,13 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,    BasicBlock *LatchBlock = L->getLoopLatch();    Instruction *IVIncInsertPt = LatchBlock->getTerminator(); -  Value *CommonBaseV = Context->getNullValue(ReplacedTy); +  Value *CommonBaseV = Constant::getNullValue(ReplacedTy); -  const SCEV* RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); -  IVExpr   ReuseIV(SE->getIntegerSCEV(0, Type::Int32Ty), -                   SE->getIntegerSCEV(0, Type::Int32Ty), +  const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy); +  IVExpr   ReuseIV(SE->getIntegerSCEV(0, +                                    Type::getInt32Ty(Preheader->getContext())), +                   SE->getIntegerSCEV(0,  +                                    Type::getInt32Ty(Preheader->getContext())),                     0);    /// Choose a strength-reduction strategy and prepare for it by creating @@ -1618,7 +1627,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,    // strength-reduced forms.  This outer loop handles all bases, the inner    // loop handles all users of a particular base.    while (!UsersToProcess.empty()) { -    const SCEV* Base = UsersToProcess.back().Base; +    const SCEV *Base = UsersToProcess.back().Base;      Instruction *Inst = UsersToProcess.back().Inst;      // Emit the code for Base into the preheader. @@ -1626,17 +1635,17 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,      if (!Base->isZero()) {        BaseV = PreheaderRewriter.expandCodeFor(Base, 0, PreInsertPt); -      DOUT << "  INSERTING code for BASE = " << *Base << ":"; +      DEBUG(errs() << "  INSERTING code for BASE = " << *Base << ":");        if (BaseV->hasName()) -        DOUT << " Result value name = %" << BaseV->getNameStr(); -      DOUT << "\n"; +        DEBUG(errs() << " Result value name = %" << BaseV->getName()); +      DEBUG(errs() << "\n");        // If BaseV is a non-zero constant, make sure that it gets inserted into        // the preheader, instead of being forward substituted into the uses.  We        // do this by forcing a BitCast (noop cast) to be inserted into the        // preheader in this case.        if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) && -          !isa<Instruction>(BaseV)) { +          isa<Constant>(BaseV)) {          // We want this constant emitted into the preheader! This is just          // using cast as a copy so BitCast (no-op cast) is appropriate          BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert", @@ -1650,15 +1659,15 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,        // FIXME: Use emitted users to emit other users.        BasedUser &User = UsersToProcess.back(); -      DOUT << "    Examining "; +      DEBUG(errs() << "    Examining ");        if (User.isUseOfPostIncrementedValue) -        DOUT << "postinc"; +        DEBUG(errs() << "postinc");        else -        DOUT << "preinc"; -      DOUT << " use "; -      DEBUG(WriteAsOperand(*DOUT, UsersToProcess.back().OperandValToReplace, +        DEBUG(errs() << "preinc"); +      DEBUG(errs() << " use "); +      DEBUG(WriteAsOperand(errs(), UsersToProcess.back().OperandValToReplace,                             /*PrintType=*/false)); -      DOUT << " in Inst: " << *(User.Inst); +      DEBUG(errs() << " in Inst: " << *User.Inst);        // If this instruction wants to use the post-incremented value, move it        // after the post-inc and use its value instead of the PHI. @@ -1673,7 +1682,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,            User.Inst->moveBefore(IVIncInsertPt);        } -      const SCEV* RewriteExpr = SE->getUnknown(RewriteOp); +      const SCEV *RewriteExpr = SE->getUnknown(RewriteOp);        if (SE->getEffectiveSCEVType(RewriteOp->getType()) !=            SE->getEffectiveSCEVType(ReplacedTy)) { @@ -1705,7 +1714,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,          // The base has been used to initialize the PHI node but we don't want          // it here.          if (!ReuseIV.Base->isZero()) { -          const SCEV* typedBase = ReuseIV.Base; +          const SCEV *typedBase = ReuseIV.Base;            if (SE->getEffectiveSCEVType(RewriteExpr->getType()) !=                SE->getEffectiveSCEVType(ReuseIV.Base->getType())) {              // It's possible the original IV is a larger type than the new IV, @@ -1770,10 +1779,10 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,  /// set the IV user and stride information and return true, otherwise return  /// false.  bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse, -                                       const SCEV* const * &CondStride) { +                                       const SCEV *const * &CondStride) {    for (unsigned Stride = 0, e = IU->StrideOrder.size();         Stride != e && !CondUse; ++Stride) { -    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = +    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =        IU->IVUsesByStride.find(IU->StrideOrder[Stride]);      assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!"); @@ -1800,7 +1809,7 @@ namespace {      const ScalarEvolution *SE;      explicit StrideCompare(const ScalarEvolution *se) : SE(se) {} -    bool operator()(const SCEV* const &LHS, const SCEV* const &RHS) { +    bool operator()(const SCEV *const &LHS, const SCEV *const &RHS) {        const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS);        const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);        if (LHSC && RHSC) { @@ -1843,14 +1852,14 @@ namespace {  /// if (v1 < 30) goto loop  ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,                                                  IVStrideUse* &CondUse, -                                              const SCEV* const* &CondStride) { +                                              const SCEV *const* &CondStride) {    // If there's only one stride in the loop, there's nothing to do here.    if (IU->StrideOrder.size() < 2)      return Cond;    // If there are other users of the condition's stride, don't bother    // trying to change the condition because the stride will still    // remain. -  std::map<const SCEV*, IVUsersOfOneStride *>::iterator I = +  std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =      IU->IVUsesByStride.find(*CondStride);    if (I == IU->IVUsesByStride.end() ||        I->second->Users.size() != 1) @@ -1867,11 +1876,11 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,    const Type *NewCmpTy = NULL;    unsigned TyBits = SE->getTypeSizeInBits(CmpTy);    unsigned NewTyBits = 0; -  const SCEV* *NewStride = NULL; +  const SCEV **NewStride = NULL;    Value *NewCmpLHS = NULL;    Value *NewCmpRHS = NULL;    int64_t Scale = 1; -  const SCEV* NewOffset = SE->getIntegerSCEV(0, CmpTy); +  const SCEV *NewOffset = SE->getIntegerSCEV(0, CmpTy);    if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {      int64_t CmpVal = C->getValue().getSExtValue(); @@ -1883,7 +1892,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,      // Look for a suitable stride / iv as replacement.      for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) { -      std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = +      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =          IU->IVUsesByStride.find(IU->StrideOrder[i]);        if (!isa<SCEVConstant>(SI->first))          continue; @@ -1942,7 +1951,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,        NewCmpTy = NewCmpLHS->getType();        NewTyBits = SE->getTypeSizeInBits(NewCmpTy); -      const Type *NewCmpIntTy = Context->getIntegerType(NewTyBits); +      const Type *NewCmpIntTy = IntegerType::get(Cond->getContext(), NewTyBits);        if (RequiresTypeConversion(NewCmpTy, CmpTy)) {          // Check if it is possible to rewrite it using          // an iv / stride of a smaller integer type. @@ -1963,7 +1972,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,        bool AllUsesAreAddresses = true;        bool AllUsesAreOutsideLoop = true;        std::vector<BasedUser> UsersToProcess; -      const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, +      const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,                                                AllUsesAreAddresses,                                                AllUsesAreOutsideLoop,                                                UsersToProcess); @@ -1987,10 +1996,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,        NewStride = &IU->StrideOrder[i];        if (!isa<PointerType>(NewCmpTy)) -        NewCmpRHS = Context->getConstantInt(NewCmpTy, NewCmpVal); +        NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);        else { -        Constant *CI = Context->getConstantInt(NewCmpIntTy, NewCmpVal); -        NewCmpRHS = Context->getConstantExprIntToPtr(CI, NewCmpTy); +        Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal); +        NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);        }        NewOffset = TyBits == NewTyBits          ? SE->getMulExpr(CondUse->getOffset(), @@ -2019,9 +2028,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,      // Create a new compare instruction using new stride / iv.      ICmpInst *OldCond = Cond;      // Insert new compare instruction. -    Cond = new ICmpInst(Predicate, NewCmpLHS, NewCmpRHS, -                        L->getHeader()->getName() + ".termcond", -                        OldCond); +    Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS, +                        L->getHeader()->getName() + ".termcond");      // Remove the old compare instruction. The old indvar is probably dead too.      DeadInsts.push_back(CondUse->getOperandValToReplace()); @@ -2098,13 +2106,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,    SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));    if (!Sel || !Sel->hasOneUse()) return Cond; -  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); +  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);    if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))      return Cond; -  const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); +  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());    // Add one to the backedge-taken count to get the trip count. -  const SCEV* IterationCount = SE->getAddExpr(BackedgeTakenCount, One); +  const SCEV *IterationCount = SE->getAddExpr(BackedgeTakenCount, One);    // Check for a max calculation that matches the pattern.    if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount)) @@ -2117,13 +2125,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,    if (Max->getNumOperands() != 2)      return Cond; -  const SCEV* MaxLHS = Max->getOperand(0); -  const SCEV* MaxRHS = Max->getOperand(1); +  const SCEV *MaxLHS = Max->getOperand(0); +  const SCEV *MaxRHS = Max->getOperand(1);    if (!MaxLHS || MaxLHS != One) return Cond;    // Check the relevant induction variable for conformance to    // the pattern. -  const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); +  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);    if (!AR || !AR->isAffine() ||        AR->getStart() != One || @@ -2152,7 +2160,7 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,    // Ok, everything looks ok to change the condition into an SLT or SGE and    // delete the max calculation.    ICmpInst *NewCond = -    new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond); +    new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");    // Delete the max calculation instructions.    Cond->replaceAllUsesWith(NewCond); @@ -2169,13 +2177,13 @@ ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,  /// inside the loop then try to eliminate the cast opeation.  void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { -  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); +  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);    if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))      return; - +        for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;         ++Stride) { -    std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = +    std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =        IU->IVUsesByStride.find(IU->StrideOrder[Stride]);      assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");      if (!isa<SCEVConstant>(SI->first)) @@ -2209,7 +2217,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {        if (TLI) {          // If target does not support DestTy natively then do not apply          // this transformation. -        MVT DVT = TLI->getValueType(DestTy); +        EVT DVT = TLI->getValueType(DestTy);          if (!TLI->isTypeLegal(DVT)) continue;        } @@ -2234,7 +2242,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {        ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));        if (!Init) continue; -      Constant *NewInit = Context->getConstantFP(DestTy, Init->getZExtValue()); +      Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());        BinaryOperator *Incr =           dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); @@ -2258,7 +2266,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {        PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);        /* create new increment. '++d' in above example. */ -      Constant *CFP = Context->getConstantFP(DestTy, C->getZExtValue()); +      Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());        BinaryOperator *NewIncr =           BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?                                   Instruction::FAdd : Instruction::FSub, @@ -2294,6 +2302,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {    // one register value.    BasicBlock *LatchBlock = L->getLoopLatch();    BasicBlock *ExitingBlock = L->getExitingBlock(); +      if (!ExitingBlock)      // Multiple exits, just look at the exit in the latch block if there is one.      ExitingBlock = LatchBlock; @@ -2305,7 +2314,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {    // Search IVUsesByStride to find Cond's IVUse if there is one.    IVStrideUse *CondUse = 0; -  const SCEV* const *CondStride = 0; +  const SCEV *const *CondStride = 0;    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());    if (!FindIVUserForCond(Cond, CondUse, CondStride))      return; // setcc doesn't use the IV. @@ -2335,7 +2344,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {        int64_t SInt = SC->getValue()->getSExtValue();        for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;             ++NewStride) { -        std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = +        std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =            IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);          if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)            continue; @@ -2349,7 +2358,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {          bool AllUsesAreAddresses = true;          bool AllUsesAreOutsideLoop = true;          std::vector<BasedUser> UsersToProcess; -        const SCEV* CommonExprs = CollectIVUsers(SI->first, *SI->second, L, +        const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,                                                  AllUsesAreAddresses,                                                  AllUsesAreOutsideLoop,                                                  UsersToProcess); @@ -2410,7 +2419,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {  void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {    // If the number of times the loop is executed isn't computable, give up. -  const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L); +  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);    if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))      return; @@ -2439,9 +2448,9 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {    // Handle only tests for equality for the moment, and only stride 1.    if (Cond->getPredicate() != CmpInst::ICMP_EQ)      return; -  const SCEV* IV = SE->getSCEV(Cond->getOperand(0)); +  const SCEV *IV = SE->getSCEV(Cond->getOperand(0));    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); -  const SCEV* One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType()); +  const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());    if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)      return;    // If the RHS of the comparison is defined inside the loop, the rewrite @@ -2497,7 +2506,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {    Value *startVal = phi->getIncomingValue(inBlock);    Value *endVal = Cond->getOperand(1);    // FIXME check for case where both are constant -  Constant* Zero = Context->getConstantInt(Cond->getOperand(1)->getType(), 0); +  Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);    BinaryOperator *NewStartVal =       BinaryOperator::Create(Instruction::Sub, endVal, startVal,                             "tmp", PreInsertPt); @@ -2516,11 +2525,9 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {    Changed = false;    if (!IU->IVUsesByStride.empty()) { -#ifndef NDEBUG -    DOUT << "\nLSR on \"" << L->getHeader()->getParent()->getNameStart() -         << "\" "; -    DEBUG(L->dump()); -#endif +    DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName() +          << "\" "; +          L->dump());      // Sort the StrideOrder so we process larger strides first.      std::stable_sort(IU->StrideOrder.begin(), IU->StrideOrder.end(), @@ -2557,7 +2564,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {      // strides deterministic - not dependent on map order.      for (unsigned Stride = 0, e = IU->StrideOrder.size();           Stride != e; ++Stride) { -      std::map<const SCEV*, IVUsersOfOneStride *>::iterator SI = +      std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =          IU->IVUsesByStride.find(IU->StrideOrder[Stride]);        assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");        // FIXME: Generalize to non-affine IV's. diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp index 23757cdb2d29..837ec59dbbce 100644 --- a/lib/Transforms/Scalar/LoopUnroll.cpp +++ b/lib/Transforms/Scalar/LoopUnroll.cpp @@ -17,9 +17,9 @@  #include "llvm/Transforms/Scalar.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/UnrollLoop.h"  #include <climits> @@ -39,7 +39,7 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,             "-unroll-threshold loop size is reached."));  namespace { -  class VISIBILITY_HIDDEN LoopUnroll : public LoopPass { +  class LoopUnroll : public LoopPass {    public:      static char ID; // Pass ID, replacement for typeid      LoopUnroll() : LoopPass(&ID) {} @@ -96,10 +96,7 @@ static unsigned ApproximateLoopSize(const Loop *L) {          // is higher than other instructions. Here 3 and 10 are magic          // numbers that help one isolated test case from PR2067 without          // negatively impacting measured benchmarks. -        if (isa<IntrinsicInst>(I)) -          Size = Size + 3; -        else -          Size = Size + 10; +        Size += isa<IntrinsicInst>(I) ? 3 : 10;        } else {          ++Size;        } @@ -118,51 +115,48 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {    LoopInfo *LI = &getAnalysis<LoopInfo>();    BasicBlock *Header = L->getHeader(); -  DOUT << "Loop Unroll: F[" << Header->getParent()->getName() -       << "] Loop %" << Header->getName() << "\n"; +  DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName() +        << "] Loop %" << Header->getName() << "\n"); +  (void)Header;    // Find trip count    unsigned TripCount = L->getSmallConstantTripCount();    unsigned Count = UnrollCount; -  +    // Automatically select an unroll count.    if (Count == 0) {      // Conservative heuristic: if we know the trip count, see if we can      // completely unroll (subject to the threshold, checked below); otherwise -    // try to find greatest modulo of the trip count which is still under  +    // try to find greatest modulo of the trip count which is still under      // threshold value. -    if (TripCount != 0) { -      Count = TripCount; -    } else { +    if (TripCount == 0)        return false; -    } +    Count = TripCount;    }    // Enforce the threshold.    if (UnrollThreshold != NoThreshold) {      unsigned LoopSize = ApproximateLoopSize(L); -    DOUT << "  Loop Size = " << LoopSize << "\n"; +    DEBUG(errs() << "  Loop Size = " << LoopSize << "\n");      uint64_t Size = (uint64_t)LoopSize*Count;      if (TripCount != 1 && Size > UnrollThreshold) { -      DOUT << "  Too large to fully unroll with count: " << Count -           << " because size: " << Size << ">" << UnrollThreshold << "\n"; -      if (UnrollAllowPartial) { -        // Reduce unroll count to be modulo of TripCount for partial unrolling -        Count = UnrollThreshold / LoopSize;         -        while (Count != 0 && TripCount%Count != 0) { -          Count--; -        }         -        if (Count < 2) { -          DOUT << "  could not unroll partially\n"; -          return false; -        } else { -          DOUT << "  partially unrolling with count: " << Count << "\n"; -        } -      } else { -        DOUT << "  will not try to unroll partially because " -             << "-unroll-allow-partial not given\n"; +      DEBUG(errs() << "  Too large to fully unroll with count: " << Count +            << " because size: " << Size << ">" << UnrollThreshold << "\n"); +      if (!UnrollAllowPartial) { +        DEBUG(errs() << "  will not try to unroll partially because " +              << "-unroll-allow-partial not given\n"); +        return false; +      } +      // Reduce unroll count to be modulo of TripCount for partial unrolling +      Count = UnrollThreshold / LoopSize; +      while (Count != 0 && TripCount%Count != 0) { +        Count--; +      } +      if (Count < 2) { +        DEBUG(errs() << "  could not unroll partially\n");          return false;        } +      DEBUG(errs() << "  partially unrolling with count: " << Count << "\n");      }    } diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index de5eedf1e84c..f6de36292603 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -34,6 +34,7 @@  #include "llvm/Instructions.h"  #include "llvm/LLVMContext.h"  #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InlineCost.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/Dominators.h" @@ -44,8 +45,8 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include <algorithm>  #include <set>  using namespace llvm; @@ -56,12 +57,14 @@ STATISTIC(NumSelects , "Number of selects unswitched");  STATISTIC(NumTrivial , "Number of unswitches that are trivial");  STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); +// The specific value of 50 here was chosen based only on intuition and a +// few specific examples.  static cl::opt<unsigned>  Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), -          cl::init(10), cl::Hidden); +          cl::init(50), cl::Hidden);  namespace { -  class VISIBILITY_HIDDEN LoopUnswitch : public LoopPass { +  class LoopUnswitch : public LoopPass {      LoopInfo *LI;  // Loop information      LPPassManager *LPM; @@ -112,6 +115,10 @@ namespace {    private: +    virtual void releaseMemory() { +      UnswitchedVals.clear(); +    } +      /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,      /// remove it.      void RemoveLoopFromWorklist(Loop *L) { @@ -168,8 +175,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {    if (isa<Constant>(Cond)) return 0;    // TODO: Handle: br (VARIANT|INVARIANT). -  // TODO: Hoist simple expressions out of loops. -  if (L->isLoopInvariant(Cond)) return Cond; + +  // Hoist simple values out. +  if (L->makeLoopInvariant(Cond, Changed)) +    return Cond;    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))      if (BO->getOpcode() == Instruction::And || @@ -214,6 +223,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {  /// and profitable.  bool LoopUnswitch::processCurrentLoop() {    bool Changed = false; +  LLVMContext &Context = currentLoop->getHeader()->getContext();    // Loop over all of the basic blocks in the loop.  If we find an interior    // block that is branching on a loop-invariant condition, we can unswitch this @@ -231,7 +241,7 @@ bool LoopUnswitch::processCurrentLoop() {          Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),                                                  currentLoop, Changed);          if (LoopCond && UnswitchIfProfitable(LoopCond,  -                                             Context->getConstantIntTrue())) { +                                             ConstantInt::getTrue(Context))) {            ++NumBranches;            return true;          } @@ -261,7 +271,7 @@ bool LoopUnswitch::processCurrentLoop() {          Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),                                                  currentLoop, Changed);          if (LoopCond && UnswitchIfProfitable(LoopCond,  -                                             Context->getConstantIntTrue())) { +                                             ConstantInt::getTrue(Context))) {            ++NumSelects;            return true;          } @@ -335,6 +345,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,                                         BasicBlock **LoopExit) {    BasicBlock *Header = currentLoop->getHeader();    TerminatorInst *HeaderTerm = Header->getTerminator(); +  LLVMContext &Context = Header->getContext();    BasicBlock *LoopExitBB = 0;    if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { @@ -349,10 +360,10 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,      // this.      if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,                                                BI->getSuccessor(0)))) { -      if (Val) *Val = Context->getConstantIntTrue(); +      if (Val) *Val = ConstantInt::getTrue(Context);      } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,                                                       BI->getSuccessor(1)))) { -      if (Val) *Val = Context->getConstantIntFalse(); +      if (Val) *Val = ConstantInt::getFalse(Context);      }    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {      // If this isn't a switch on Cond, we can't handle it. @@ -398,29 +409,14 @@ unsigned LoopUnswitch::getLoopUnswitchCost(Value *LIC) {    if (IsTrivialUnswitchCondition(LIC))      return 0; -  // FIXME: This is really overly conservative.  However, more liberal  -  // estimations have thus far resulted in excessive unswitching, which is bad -  // both in compile time and in code size.  This should be replaced once -  // someone figures out how a good estimation. -  return currentLoop->getBlocks().size(); -   -  unsigned Cost = 0; -  // FIXME: this is brain dead.  It should take into consideration code -  // shrinkage. +  // FIXME: This is overly conservative because it does not take into +  // consideration code simplification opportunities. +  CodeMetrics Metrics;    for (Loop::block_iterator I = currentLoop->block_begin(),            E = currentLoop->block_end(); -       I != E; ++I) { -    BasicBlock *BB = *I; -    // Do not include empty blocks in the cost calculation.  This happen due to -    // loop canonicalization and will be removed. -    if (BB->begin() == BasicBlock::iterator(BB->getTerminator())) -      continue; -     -    // Count basic blocks. -    ++Cost; -  } - -  return Cost; +       I != E; ++I) +    Metrics.analyzeBasicBlock(*I); +  return Metrics.NumInsts;  }  /// UnswitchIfProfitable - We have found that we can unswitch currentLoop when @@ -445,9 +441,9 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){      // FIXME: this should estimate growth by the amount of code shared by the      // resultant unswitched loops.      // -    DOUT << "NOT unswitching loop %" -         << currentLoop->getHeader()->getName() << ", cost too high: " -         << currentLoop->getBlocks().size() << "\n"; +    DEBUG(errs() << "NOT unswitching loop %" +          << currentLoop->getHeader()->getName() << ", cost too high: " +          << currentLoop->getBlocks().size() << "\n");      return false;    } @@ -506,14 +502,20 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,    // Insert a conditional branch on LIC to the two preheaders.  The original    // code is the true version and the new code is the false version.    Value *BranchVal = LIC; -  if (!isa<ConstantInt>(Val) || Val->getType() != Type::Int1Ty) -    BranchVal = new ICmpInst(ICmpInst::ICMP_EQ, LIC, Val, "tmp", InsertPt); -  else if (Val != Context->getConstantIntTrue()) +  if (!isa<ConstantInt>(Val) || +      Val->getType() != Type::getInt1Ty(LIC->getContext())) +    BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp"); +  else if (Val != ConstantInt::getTrue(Val->getContext()))      // We want to enter the new loop when the condition is true.      std::swap(TrueDest, FalseDest);    // Insert the new branch. -  BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); +  BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); + +  // If either edge is critical, split it. This helps preserve LoopSimplify +  // form for enclosing loops. +  SplitCriticalEdge(BI, 0, this); +  SplitCriticalEdge(BI, 1, this);  }  /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable @@ -524,10 +526,10 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,  void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,                                               Constant *Val,                                               BasicBlock *ExitBlock) { -  DOUT << "loop-unswitch: Trivial-Unswitch loop %" -       << loopHeader->getName() << " [" << L->getBlocks().size() -       << " blocks] in Function " << L->getHeader()->getParent()->getName() -       << " on cond: " << *Val << " == " << *Cond << "\n"; +  DEBUG(errs() << "loop-unswitch: Trivial-Unswitch loop %" +        << loopHeader->getName() << " [" << L->getBlocks().size() +        << " blocks] in Function " << L->getHeader()->getParent()->getName() +        << " on cond: " << *Val << " == " << *Cond << "\n");    // First step, split the preheader, so that we know that there is a safe place    // to insert the conditional branch.  We will change loopPreheader to have a @@ -570,47 +572,11 @@ void LoopUnswitch::SplitExitEdges(Loop *L,    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {      BasicBlock *ExitBlock = ExitBlocks[i]; -    std::vector<BasicBlock*> Preds(pred_begin(ExitBlock), pred_end(ExitBlock)); - -    for (unsigned j = 0, e = Preds.size(); j != e; ++j) { -      BasicBlock* NewExitBlock = SplitEdge(Preds[j], ExitBlock, this); -      BasicBlock* StartBlock = Preds[j]; -      BasicBlock* EndBlock; -      if (NewExitBlock->getSinglePredecessor() == ExitBlock) { -        EndBlock = NewExitBlock; -        NewExitBlock = EndBlock->getSinglePredecessor(); -      } else { -        EndBlock = ExitBlock; -      } -       -      std::set<PHINode*> InsertedPHIs; -      PHINode* OldLCSSA = 0; -      for (BasicBlock::iterator I = EndBlock->begin(); -           (OldLCSSA = dyn_cast<PHINode>(I)); ++I) { -        Value* OldValue = OldLCSSA->getIncomingValueForBlock(NewExitBlock); -        PHINode* NewLCSSA = PHINode::Create(OldLCSSA->getType(), -                                            OldLCSSA->getName() + ".us-lcssa", -                                            NewExitBlock->getTerminator()); -        NewLCSSA->addIncoming(OldValue, StartBlock); -        OldLCSSA->setIncomingValue(OldLCSSA->getBasicBlockIndex(NewExitBlock), -                                   NewLCSSA); -        InsertedPHIs.insert(NewLCSSA); -      } - -      BasicBlock::iterator InsertPt = EndBlock->getFirstNonPHI(); -      for (BasicBlock::iterator I = NewExitBlock->begin(); -         (OldLCSSA = dyn_cast<PHINode>(I)) && InsertedPHIs.count(OldLCSSA) == 0; -         ++I) { -        PHINode *NewLCSSA = PHINode::Create(OldLCSSA->getType(), -                                            OldLCSSA->getName() + ".us-lcssa", -                                            InsertPt); -        OldLCSSA->replaceAllUsesWith(NewLCSSA); -        NewLCSSA->addIncoming(OldLCSSA, NewExitBlock); -      } - -    }     +    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), +                                       pred_end(ExitBlock)); +    SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(), +                           ".us-lcssa", this);    } -  }  /// UnswitchNontrivialCondition - We determined that the loop is profitable  @@ -619,10 +585,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L,  void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,                                                  Loop *L) {    Function *F = loopHeader->getParent(); -  DOUT << "loop-unswitch: Unswitching loop %" -       << loopHeader->getName() << " [" << L->getBlocks().size() -       << " blocks] in Function " << F->getName() -       << " when '" << *Val << "' == " << *LIC << "\n"; +  DEBUG(errs() << "loop-unswitch: Unswitching loop %" +        << loopHeader->getName() << " [" << L->getBlocks().size() +        << " blocks] in Function " << F->getName() +        << " when '" << *Val << "' == " << *LIC << "\n");    LoopBlocks.clear();    NewBlocks.clear(); @@ -745,7 +711,7 @@ static void RemoveFromWorklist(Instruction *I,  static void ReplaceUsesOfWith(Instruction *I, Value *V,                                 std::vector<Instruction*> &Worklist,                                Loop *L, LPPassManager *LPM) { -  DOUT << "Replace with '" << *V << "': " << *I; +  DEBUG(errs() << "Replace with '" << *V << "': " << *I);    // Add uses to the worklist, which may be dead now.    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -788,7 +754,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,            // dominates the latch).            LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);            Pred->getTerminator()->eraseFromParent(); -          new UnreachableInst(Pred); +          new UnreachableInst(BB->getContext(), Pred);            // The loop is now broken, remove it from LI.            RemoveLoopFromHierarchy(L); @@ -807,7 +773,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,      return;    } -  DOUT << "Nuking dead block: " << *BB; +  DEBUG(errs() << "Nuking dead block: " << *BB);    // Remove the instructions in the basic block from the worklist.    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { @@ -815,8 +781,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,      // Anything that uses the instructions in this basic block should have their      // uses replaced with undefs. -    if (!I->use_empty()) -      I->replaceAllUsesWith(Context->getUndef(I->getType())); +    // If I is not void type then replaceAllUsesWith undef. +    // This allows ValueHandlers and custom metadata to adjust itself. +    if (!I->getType()->isVoidTy()) +      I->replaceAllUsesWith(UndefValue::get(I->getType()));    }    // If this is the edge to the header block for a loop, remove the loop and @@ -897,15 +865,18 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,    // selects, switches.    std::vector<User*> Users(LIC->use_begin(), LIC->use_end());    std::vector<Instruction*> Worklist; +  LLVMContext &Context = Val->getContext(); +    // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC    // in the loop with the appropriate one directly. -  if (IsEqual || (isa<ConstantInt>(Val) && Val->getType() == Type::Int1Ty)) { +  if (IsEqual || (isa<ConstantInt>(Val) && +      Val->getType() == Type::getInt1Ty(Val->getContext()))) {      Value *Replacement;      if (IsEqual)        Replacement = Val;      else -      Replacement = Context->getConstantInt(Type::Int1Ty,  +      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),                                        !cast<ConstantInt>(Val)->getZExtValue());      for (unsigned i = 0, e = Users.size(); i != e; ++i) @@ -937,27 +908,35 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,                // FIXME: This is a hack.  We need to keep the successor around                // and hooked up so as to preserve the loop structure, because                // trying to update it is complicated.  So instead we preserve the -              // loop structure and put the block on an dead code path. -               -              BasicBlock *SISucc = SI->getSuccessor(i); -              BasicBlock* Old = SI->getParent(); -              BasicBlock* Split = SplitBlock(Old, SI, this); -               -              Instruction* OldTerm = Old->getTerminator(); -              BranchInst::Create(Split, SISucc, -                                 Context->getConstantIntTrue(), OldTerm); - -              LPM->deleteSimpleAnalysisValue(Old->getTerminator(), L); -              Old->getTerminator()->eraseFromParent(); -               -              PHINode *PN; -              for (BasicBlock::iterator II = SISucc->begin(); -                   (PN = dyn_cast<PHINode>(II)); ++II) { -                Value *InVal = PN->removeIncomingValue(Split, false); -                PN->addIncoming(InVal, Old); -              } - -              SI->removeCase(i); +              // loop structure and put the block on a dead code path. +              BasicBlock *Switch = SI->getParent(); +              SplitEdge(Switch, SI->getSuccessor(i), this); +              // Compute the successors instead of relying on the return value +              // of SplitEdge, since it may have split the switch successor +              // after PHI nodes. +              BasicBlock *NewSISucc = SI->getSuccessor(i); +              BasicBlock *OldSISucc = *succ_begin(NewSISucc); +              // Create an "unreachable" destination. +              BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", +                                                     Switch->getParent(), +                                                     OldSISucc); +              new UnreachableInst(Context, Abort); +              // Force the new case destination to branch to the "unreachable" +              // block while maintaining a (dead) CFG edge to the old block. +              NewSISucc->getTerminator()->eraseFromParent(); +              BranchInst::Create(Abort, OldSISucc, +                                 ConstantInt::getTrue(Context), NewSISucc); +              // Release the PHI operands for this edge. +              for (BasicBlock::iterator II = NewSISucc->begin(); +                   PHINode *PN = dyn_cast<PHINode>(II); ++II) +                PN->setIncomingValue(PN->getBasicBlockIndex(Switch), +                                     UndefValue::get(PN->getType())); +              // Tell the domtree about the new block. We don't fully update the +              // domtree here -- instead we force it to do a full recomputation +              // after the pass is complete -- but we do need to inform it of +              // new blocks. +              if (DT) +                DT->addNewBlock(Abort, NewSISucc);                break;              }            } @@ -971,7 +950,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,    SimplifyCode(Worklist, L);  } -/// SimplifyCode - Okay, now that we have simplified some instructions in the  +/// SimplifyCode - Okay, now that we have simplified some instructions in the  /// loop, walk over it and constant prop, dce, and fold control flow where  /// possible.  Note that this is effectively a very simple loop-structure-aware  /// optimizer.  During processing of this loop, L could very well be deleted, so @@ -986,14 +965,14 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {      Worklist.pop_back();      // Simple constant folding. -    if (Constant *C = ConstantFoldInstruction(I)) { +    if (Constant *C = ConstantFoldInstruction(I, I->getContext())) {        ReplaceUsesOfWith(I, C, Worklist, L, LPM);        continue;      }      // Simple DCE.      if (isInstructionTriviallyDead(I)) { -      DOUT << "Remove dead instruction '" << *I; +      DEBUG(errs() << "Remove dead instruction '" << *I);        // Add uses to the worklist, which may be dead now.        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1017,10 +996,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {        break;      case Instruction::And:        if (isa<ConstantInt>(I->getOperand(0)) &&  -          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS +          // constant -> RHS +          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))             cast<BinaryOperator>(I)->swapOperands();        if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1)))  -        if (CB->getType() == Type::Int1Ty) { +        if (CB->getType() == Type::getInt1Ty(I->getContext())) {            if (CB->isOne())      // X & 1 -> X              ReplaceUsesOfWith(I, I->getOperand(0), Worklist, L, LPM);            else                  // X & 0 -> 0 @@ -1030,10 +1010,11 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {        break;      case Instruction::Or:        if (isa<ConstantInt>(I->getOperand(0)) && -          I->getOperand(0)->getType() == Type::Int1Ty)   // constant -> RHS +          // constant -> RHS +          I->getOperand(0)->getType() == Type::getInt1Ty(I->getContext()))          cast<BinaryOperator>(I)->swapOperands();        if (ConstantInt *CB = dyn_cast<ConstantInt>(I->getOperand(1))) -        if (CB->getType() == Type::Int1Ty) { +        if (CB->getType() == Type::getInt1Ty(I->getContext())) {            if (CB->isOne())   // X | 1 -> 1              ReplaceUsesOfWith(I, I->getOperand(1), Worklist, L, LPM);            else                  // X | 0 -> X @@ -1052,8 +1033,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {          if (!SinglePred) continue;  // Nothing to do.          assert(SinglePred == Pred && "CFG broken"); -        DOUT << "Merging blocks: " << Pred->getName() << " <- "  -             << Succ->getName() << "\n"; +        DEBUG(errs() << "Merging blocks: " << Pred->getName() << " <- "  +              << Succ->getName() << "\n");          // Resolve any single entry PHI nodes in Succ.          while (PHINode *PN = dyn_cast<PHINode>(Succ->begin())) @@ -1080,7 +1061,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {          // remove dead blocks.          break;  // FIXME: Enable. -        DOUT << "Folded branch: " << *BI; +        DEBUG(errs() << "Folded branch: " << *BI);          BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());          BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());          DeadSucc->removePredecessor(BI->getParent(), true); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3c7a5ab8f4d3..c922814833c5 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -24,29 +24,33 @@  #include "llvm/Analysis/MemoryDependenceAnalysis.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Target/TargetData.h"  #include <list>  using namespace llvm;  STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");  STATISTIC(NumMemSetInfer, "Number of memsets inferred"); +STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");  /// isBytewiseValue - If the specified value can be set by repeating the same  /// byte in memory, return the i8 value that it is represented with.  This is  /// true for all i8 values obviously, but is also true for i32 0, i32 -1,  /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated  /// byte store (e.g. i16 0x1234), return null. -static Value *isBytewiseValue(Value *V, LLVMContext* Context) { +static Value *isBytewiseValue(Value *V) { +  LLVMContext &Context = V->getContext(); +      // All byte-wide stores are splatable, even of arbitrary variables. -  if (V->getType() == Type::Int8Ty) return V; +  if (V->getType() == Type::getInt8Ty(Context)) return V;    // Constant float and double values can be handled as integer values if the    // corresponding integer value is "byteable".  An important case is 0.0.     if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) { -    if (CFP->getType() == Type::FloatTy) -      V = Context->getConstantExprBitCast(CFP, Type::Int32Ty); -    if (CFP->getType() == Type::DoubleTy) -      V = Context->getConstantExprBitCast(CFP, Type::Int64Ty); +    if (CFP->getType()->isFloatTy()) +      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(Context)); +    if (CFP->getType()->isDoubleTy()) +      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(Context));      // Don't handle long double formats, which have strange constraints.    } @@ -69,7 +73,7 @@ static Value *isBytewiseValue(Value *V, LLVMContext* Context) {          if (Val != Val2)            return 0;        } -      return Context->getConstantInt(Val); +      return ConstantInt::get(Context, Val);      }    } @@ -271,6 +275,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {    if (Start < I->Start) {      I->Start = Start;      I->StartPtr = SI->getPointerOperand(); +    I->Alignment = SI->getAlignment();    }    // Now we know that Start <= I->End and Start >= I->Start (so the startpoint @@ -295,8 +300,7 @@ void MemsetRanges::addStore(int64_t Start, StoreInst *SI) {  //===----------------------------------------------------------------------===//  namespace { - -  class VISIBILITY_HIDDEN MemCpyOpt : public FunctionPass { +  class MemCpyOpt : public FunctionPass {      bool runOnFunction(Function &F);    public:      static char ID; // Pass identification, replacement for typeid @@ -309,16 +313,15 @@ namespace {        AU.addRequired<DominatorTree>();        AU.addRequired<MemoryDependenceAnalysis>();        AU.addRequired<AliasAnalysis>(); -      AU.addRequired<TargetData>();        AU.addPreserved<AliasAnalysis>();        AU.addPreserved<MemoryDependenceAnalysis>(); -      AU.addPreserved<TargetData>();      }      // Helper fuctions -    bool processStore(StoreInst *SI, BasicBlock::iterator& BBI); -    bool processMemCpy(MemCpyInst* M); -    bool performCallSlotOptzn(MemCpyInst* cpy, CallInst* C); +    bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); +    bool processMemCpy(MemCpyInst *M); +    bool processMemMove(MemMoveInst *M); +    bool performCallSlotOptzn(MemCpyInst *cpy, CallInst *C);      bool iterateOnFunction(Function &F);    }; @@ -337,27 +340,31 @@ static RegisterPass<MemCpyOpt> X("memcpyopt",  /// some other patterns to fold away.  In particular, this looks for stores to  /// neighboring locations of memory.  If it sees enough consequtive ones  /// (currently 4) it attempts to merge them together into a memcpy/memset. -bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) { +bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {    if (SI->isVolatile()) return false; +  LLVMContext &Context = SI->getContext(); +    // There are two cases that are interesting for this code to handle: memcpy    // and memset.  Right now we only handle memset.    // Ensure that the value being stored is something that can be memset'able a    // byte at a time like "0" or "-1" or any width, as well as things like    // 0xA0A0A0A0 and 0.0. -  Value *ByteVal = isBytewiseValue(SI->getOperand(0), Context); +  Value *ByteVal = isBytewiseValue(SI->getOperand(0));    if (!ByteVal)      return false; -  TargetData &TD = getAnalysis<TargetData>(); +  TargetData *TD = getAnalysisIfAvailable<TargetData>(); +  if (!TD) return false;    AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); +  Module *M = SI->getParent()->getParent()->getParent();    // Okay, so we now have a single store that can be splatable.  Scan to find    // all subsequent stores of the same value to offset from the same pointer.    // Join these together into ranges, so we can decide whether contiguous blocks    // are stored. -  MemsetRanges Ranges(TD); +  MemsetRanges Ranges(*TD);    Value *StartPtr = SI->getPointerOperand(); @@ -385,12 +392,12 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {      if (NextStore->isVolatile()) break;      // Check to see if this stored value is of the same byte-splattable value. -    if (ByteVal != isBytewiseValue(NextStore->getOperand(0), Context)) +    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))        break;      // Check to see if this store is to a constant offset from the start ptr.      int64_t Offset; -    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, TD)) +    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))        break;      Ranges.addStore(Offset, NextStore); @@ -405,7 +412,6 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {    // store as well.  We try to avoid this unless there is at least something    // interesting as a small compile-time optimization.    Ranges.addStore(0, SI); -    Function *MemSetF = 0; @@ -419,7 +425,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {      if (Range.TheStores.size() == 1) continue;      // If it is profitable to lower this range to memset, do so now. -    if (!Range.isProfitableToUseMemset(TD)) +    if (!Range.isProfitableToUseMemset(*TD))        continue;      // Otherwise, we do want to transform this!  Create a new memset.  We put @@ -429,37 +435,38 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator& BBI) {      BasicBlock::iterator InsertPt = BI;      if (MemSetF == 0) { -      const Type *Tys[] = {Type::Int64Ty}; -      MemSetF = Intrinsic::getDeclaration(SI->getParent()->getParent() -                                          ->getParent(), Intrinsic::memset, -                                          Tys, 1); -   } +      const Type *Ty = Type::getInt64Ty(Context); +      MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, &Ty, 1); +    }      // Get the starting pointer of the block.      StartPtr = Range.StartPtr;      // Cast the start ptr to be i8* as memset requires. -    const Type *i8Ptr = Context->getPointerTypeUnqual(Type::Int8Ty); +    const Type *i8Ptr = Type::getInt8PtrTy(Context);      if (StartPtr->getType() != i8Ptr) -      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getNameStart(), +      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),                                   InsertPt);      Value *Ops[] = {        StartPtr, ByteVal,   // Start, value -      Context->getConstantInt(Type::Int64Ty, Range.End-Range.Start),  // size -      Context->getConstantInt(Type::Int32Ty, Range.Alignment)   // align +      // size +      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start), +      // align +      ConstantInt::get(Type::getInt32Ty(Context), Range.Alignment)      };      Value *C = CallInst::Create(MemSetF, Ops, Ops+4, "", InsertPt); -    DEBUG(cerr << "Replace stores:\n"; +    DEBUG(errs() << "Replace stores:\n";            for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) -            cerr << *Range.TheStores[i]; -          cerr << "With: " << *C); C=C; +            errs() << *Range.TheStores[i]; +          errs() << "With: " << *C); C=C;      // Don't invalidate the iterator      BBI = BI;      // Zap all the stores. -    for (SmallVector<StoreInst*, 16>::const_iterator SI = Range.TheStores.begin(), +    for (SmallVector<StoreInst*, 16>::const_iterator +         SI = Range.TheStores.begin(),           SE = Range.TheStores.end(); SI != SE; ++SI)        (*SI)->eraseFromParent();      ++NumMemSetInfer; @@ -490,29 +497,30 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {    // Deliberately get the source and destination with bitcasts stripped away,    // because we'll need to do type comparisons based on the underlying type. -  Value* cpyDest = cpy->getDest(); -  Value* cpySrc = cpy->getSource(); +  Value *cpyDest = cpy->getDest(); +  Value *cpySrc = cpy->getSource();    CallSite CS = CallSite::get(C);    // We need to be able to reason about the size of the memcpy, so we require    // that it be a constant. -  ConstantInt* cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); +  ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength());    if (!cpyLength)      return false;    // Require that src be an alloca.  This simplifies the reasoning considerably. -  AllocaInst* srcAlloca = dyn_cast<AllocaInst>(cpySrc); +  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);    if (!srcAlloca)      return false;    // Check that all of src is copied to dest. -  TargetData& TD = getAnalysis<TargetData>(); +  TargetData *TD = getAnalysisIfAvailable<TargetData>(); +  if (!TD) return false; -  ConstantInt* srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); +  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());    if (!srcArraySize)      return false; -  uint64_t srcSize = TD.getTypeAllocSize(srcAlloca->getAllocatedType()) * +  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *      srcArraySize->getZExtValue();    if (cpyLength->getZExtValue() < srcSize) @@ -521,25 +529,25 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {    // Check that accessing the first srcSize bytes of dest will not cause a    // trap.  Otherwise the transform is invalid since it might cause a trap    // to occur earlier than it otherwise would. -  if (AllocaInst* A = dyn_cast<AllocaInst>(cpyDest)) { +  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {      // The destination is an alloca.  Check it is larger than srcSize. -    ConstantInt* destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); +    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());      if (!destArraySize)        return false; -    uint64_t destSize = TD.getTypeAllocSize(A->getAllocatedType()) * +    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *        destArraySize->getZExtValue();      if (destSize < srcSize)        return false; -  } else if (Argument* A = dyn_cast<Argument>(cpyDest)) { +  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {      // If the destination is an sret parameter then only accesses that are      // outside of the returned struct type can trap.      if (!A->hasStructRetAttr())        return false; -    const Type* StructTy = cast<PointerType>(A->getType())->getElementType(); -    uint64_t destSize = TD.getTypeAllocSize(StructTy); +    const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); +    uint64_t destSize = TD->getTypeAllocSize(StructTy);      if (destSize < srcSize)        return false; @@ -554,14 +562,14 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {    SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),                                     srcAlloca->use_end());    while (!srcUseList.empty()) { -    User* UI = srcUseList.back(); +    User *UI = srcUseList.back();      srcUseList.pop_back();      if (isa<BitCastInst>(UI)) {        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();             I != E; ++I)          srcUseList.push_back(*I); -    } else if (GetElementPtrInst* G = dyn_cast<GetElementPtrInst>(UI)) { +    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {        if (G->hasAllZeroIndices())          for (User::use_iterator I = UI->use_begin(), E = UI->use_end();               I != E; ++I) @@ -575,8 +583,8 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {    // Since we're changing the parameter to the callsite, we need to make sure    // that what would be the new parameter dominates the callsite. -  DominatorTree& DT = getAnalysis<DominatorTree>(); -  if (Instruction* cpyDestInst = dyn_cast<Instruction>(cpyDest)) +  DominatorTree &DT = getAnalysis<DominatorTree>(); +  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))      if (!DT.dominates(cpyDestInst, C))        return false; @@ -584,7 +592,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {    // unexpected manner, for example via a global, which we deduce from    // the use analysis, we also need to know that it does not sneakily    // access dest.  We rely on AA to figure this out for us. -  AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); +  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();    if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) !=        AliasAnalysis::NoModRef)      return false; @@ -597,11 +605,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {          cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),                                                cpyDest->getName(), C);        changedArgument = true; -      if (CS.getArgument(i)->getType() != cpyDest->getType()) -        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,  -                       CS.getArgument(i)->getType(), cpyDest->getName(), C)); -      else +      if (CS.getArgument(i)->getType() == cpyDest->getType())          CS.setArgument(i, cpyDest); +      else +        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest,  +                          CS.getArgument(i)->getType(), cpyDest->getName(), C));      }    if (!changedArgument) @@ -609,7 +617,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {    // Drop any cached information about the call, because we may have changed    // its dependence information by changing its parameter. -  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); +  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();    MD.removeInstruction(C);    // Remove the memcpy @@ -624,22 +632,22 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {  /// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be  /// a memcpy from X to Z (or potentially a memmove, depending on circumstances).  ///  This allows later passes to remove the first memcpy altogether. -bool MemCpyOpt::processMemCpy(MemCpyInst* M) { -  MemoryDependenceAnalysis& MD = getAnalysis<MemoryDependenceAnalysis>(); +bool MemCpyOpt::processMemCpy(MemCpyInst *M) { +  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();    // The are two possible optimizations we can do for memcpy: -  //   a) memcpy-memcpy xform which exposes redundance for DSE -  //   b) call-memcpy xform for return slot optimization +  //   a) memcpy-memcpy xform which exposes redundance for DSE. +  //   b) call-memcpy xform for return slot optimization.    MemDepResult dep = MD.getDependency(M);    if (!dep.isClobber())      return false;    if (!isa<MemCpyInst>(dep.getInst())) { -    if (CallInst* C = dyn_cast<CallInst>(dep.getInst())) +    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))        return performCallSlotOptzn(M, C);      return false;    } -  MemCpyInst* MDep = cast<MemCpyInst>(dep.getInst()); +  MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());    // We can only transforms memcpy's where the dest of one is the source of the    // other @@ -648,8 +656,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {    // Second, the length of the memcpy's must be the same, or the preceeding one    // must be larger than the following one. -  ConstantInt* C1 = dyn_cast<ConstantInt>(MDep->getLength()); -  ConstantInt* C2 = dyn_cast<ConstantInt>(M->getLength()); +  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); +  ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());    if (!C1 || !C2)      return false; @@ -661,7 +669,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {    // Finally, we have to make sure that the dest of the second does not    // alias the source of the first -  AliasAnalysis& AA = getAnalysis<AliasAnalysis>(); +  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();    if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=        AliasAnalysis::NoAlias)      return false; @@ -673,17 +681,16 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {      return false;    // If all checks passed, then we can transform these memcpy's -  const Type *Tys[1]; -  Tys[0] = M->getLength()->getType(); -  Function* MemCpyFun = Intrinsic::getDeclaration( +  const Type *Ty = M->getLength()->getType(); +  Function *MemCpyFun = Intrinsic::getDeclaration(                                   M->getParent()->getParent()->getParent(), -                                 M->getIntrinsicID(), Tys, 1); +                                 M->getIntrinsicID(), &Ty, 1);    Value *Args[4] = {      M->getRawDest(), MDep->getRawSource(), M->getLength(), M->getAlignmentCst()    }; -  CallInst* C = CallInst::Create(MemCpyFun, Args, Args+4, "", M); +  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+4, "", M);    // If C and M don't interfere, then this is a valid transformation.  If they @@ -702,41 +709,78 @@ bool MemCpyOpt::processMemCpy(MemCpyInst* M) {    return false;  } -// MemCpyOpt::runOnFunction - This is the main transformation entry point for a -// function. -// -bool MemCpyOpt::runOnFunction(Function& F) { +/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst +/// are guaranteed not to alias. +bool MemCpyOpt::processMemMove(MemMoveInst *M) { +  AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); + +  // If the memmove is a constant size, use it for the alias query, this allows +  // us to optimize things like: memmove(P, P+64, 64); +  uint64_t MemMoveSize = ~0ULL; +  if (ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength())) +    MemMoveSize = Len->getZExtValue(); -  bool changed = false; -  bool shouldContinue = true; +  // See if the pointers alias. +  if (AA.alias(M->getRawDest(), MemMoveSize, M->getRawSource(), MemMoveSize) != +      AliasAnalysis::NoAlias) +    return false; -  while (shouldContinue) { -    shouldContinue = iterateOnFunction(F); -    changed |= shouldContinue; -  } +  DEBUG(errs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); -  return changed; -} +  // If not, then we know we can transform this. +  Module *Mod = M->getParent()->getParent()->getParent(); +  const Type *Ty = M->getLength()->getType(); +  M->setOperand(0, Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, &Ty, 1)); +  // MemDep may have over conservative information about this instruction, just +  // conservatively flush it from the cache. +  getAnalysis<MemoryDependenceAnalysis>().removeInstruction(M); -// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN +  ++NumMoveToCpy; +  return true; +} +   + +// MemCpyOpt::iterateOnFunction - Executes one iteration of GVN.  bool MemCpyOpt::iterateOnFunction(Function &F) { -  bool changed_function = false; +  bool MadeChange = false; -  // Walk all instruction in the function +  // Walk all instruction in the function.    for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();           BI != BE;) { -      // Avoid invalidating the iterator -      Instruction* I = BI++; +      // Avoid invalidating the iterator. +      Instruction *I = BI++;        if (StoreInst *SI = dyn_cast<StoreInst>(I)) -        changed_function |= processStore(SI, BI); -      else if (MemCpyInst* M = dyn_cast<MemCpyInst>(I)) { -        changed_function |= processMemCpy(M); +        MadeChange |= processStore(SI, BI); +      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) +        MadeChange |= processMemCpy(M); +      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) { +        if (processMemMove(M)) { +          --BI;         // Reprocess the new memcpy. +          MadeChange = true; +        }        }      }    } -  return changed_function; +  return MadeChange; +} + +// MemCpyOpt::runOnFunction - This is the main transformation entry point for a +// function. +// +bool MemCpyOpt::runOnFunction(Function &F) { +  bool MadeChange = false; +  while (1) { +    if (!iterateOnFunction(F)) +      break; +    MadeChange = true; +  } +   +  return MadeChange;  } + + + diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index fa60a9dba3b5..e6ffac251b7b 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -31,9 +31,9 @@  #include "llvm/Pass.h"  #include "llvm/Assembly/Writer.h"  #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/PostOrderIterator.h"  #include "llvm/ADT/Statistic.h"  #include <algorithm> @@ -46,7 +46,7 @@ STATISTIC(NumAnnihil, "Number of expr tree annihilated");  STATISTIC(NumFactor , "Number of multiplies factored");  namespace { -  struct VISIBILITY_HIDDEN ValueEntry { +  struct ValueEntry {      unsigned Rank;      Value *Op;      ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} @@ -61,17 +61,17 @@ namespace {  ///  static void PrintOps(Instruction *I, const std::vector<ValueEntry> &Ops) {    Module *M = I->getParent()->getParent()->getParent(); -  cerr << Instruction::getOpcodeName(I->getOpcode()) << " " +  errs() << Instruction::getOpcodeName(I->getOpcode()) << " "         << *Ops[0].Op->getType();    for (unsigned i = 0, e = Ops.size(); i != e; ++i) { -    WriteAsOperand(*cerr.stream() << " ", Ops[i].Op, false, M); -    cerr << "," << Ops[i].Rank; +    WriteAsOperand(errs() << " ", Ops[i].Op, false, M); +    errs() << "," << Ops[i].Rank;    }  }  #endif  namespace { -  class VISIBILITY_HIDDEN Reassociate : public FunctionPass { +  class Reassociate : public FunctionPass {      std::map<BasicBlock*, unsigned> RankMap;      std::map<AssertingVH<>, unsigned> ValueRankMap;      bool MadeChange; @@ -181,8 +181,8 @@ unsigned Reassociate::getRank(Value *V) {        (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))      ++Rank; -  //DOUT << "Calculated Rank[" << V->getName() << "] = " -  //     << Rank << "\n"; +  //DEBUG(errs() << "Calculated Rank[" << V->getName() << "] = " +  //     << Rank << "\n");    return CachedRank = Rank;  } @@ -200,8 +200,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {  ///  static Instruction *LowerNegateToMultiply(Instruction *Neg,                                std::map<AssertingVH<>, unsigned> &ValueRankMap, -                              LLVMContext* Context) { -  Constant *Cst = Context->getConstantIntAllOnesValue(Neg->getType()); +                              LLVMContext &Context) { +  Constant *Cst = Constant::getAllOnesValue(Neg->getType());    Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);    ValueRankMap.erase(Neg); @@ -222,7 +222,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {           isReassociableOp(RHS, I->getOpcode()) &&           "Not an expression that needs linearization?"); -  DOUT << "Linear" << *LHS << *RHS << *I; +  DEBUG(errs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');    // Move the RHS instruction to live immediately before I, avoiding breaking    // dominator properties. @@ -235,7 +235,7 @@ void Reassociate::LinearizeExpr(BinaryOperator *I) {    ++NumLinear;    MadeChange = true; -  DOUT << "Linearized: " << *I; +  DEBUG(errs() << "Linearized: " << *I << '\n');    // If D is part of this expression tree, tail recurse.    if (isReassociableOp(I->getOperand(1), I->getOpcode())) @@ -256,6 +256,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,                                      std::vector<ValueEntry> &Ops) {    Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);    unsigned Opcode = I->getOpcode(); +  LLVMContext &Context = I->getContext();    // First step, linearize the expression if it is in ((A+B)+(C+D)) form.    BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode); @@ -284,8 +285,8 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,        Ops.push_back(ValueEntry(getRank(RHS), RHS));        // Clear the leaves out. -      I->setOperand(0, Context->getUndef(I->getType())); -      I->setOperand(1, Context->getUndef(I->getType())); +      I->setOperand(0, UndefValue::get(I->getType())); +      I->setOperand(1, UndefValue::get(I->getType()));        return;      } else {        // Turn X+(Y+Z) -> (Y+Z)+X @@ -320,7 +321,7 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,    Ops.push_back(ValueEntry(getRank(RHS), RHS));    // Clear the RHS leaf out. -  I->setOperand(1, Context->getUndef(I->getType())); +  I->setOperand(1, UndefValue::get(I->getType()));  }  // RewriteExprTree - Now that the operands for this expression tree are @@ -333,10 +334,10 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,      if (I->getOperand(0) != Ops[i].Op ||          I->getOperand(1) != Ops[i+1].Op) {        Value *OldLHS = I->getOperand(0); -      DOUT << "RA: " << *I; +      DEBUG(errs() << "RA: " << *I << '\n');        I->setOperand(0, Ops[i].Op);        I->setOperand(1, Ops[i+1].Op); -      DOUT << "TO: " << *I; +      DEBUG(errs() << "TO: " << *I << '\n');        MadeChange = true;        ++NumChanged; @@ -349,9 +350,9 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,    assert(i+2 < Ops.size() && "Ops index out of range!");    if (I->getOperand(1) != Ops[i].Op) { -    DOUT << "RA: " << *I; +    DEBUG(errs() << "RA: " << *I << '\n');      I->setOperand(1, Ops[i].Op); -    DOUT << "TO: " << *I; +    DEBUG(errs() << "TO: " << *I << '\n');      MadeChange = true;      ++NumChanged;    } @@ -373,7 +374,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,  // version of the value is returned, and BI is left pointing at the instruction  // that should be processed next by the reassociation pass.  // -static Value *NegateValue(Value *V, Instruction *BI) { +static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {    // We are trying to expose opportunity for reassociation.  One of the things    // that we want to do to achieve this is to push a negation as deep into an    // expression chain as possible, to expose the add instructions.  In practice, @@ -386,8 +387,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {    if (Instruction *I = dyn_cast<Instruction>(V))      if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {        // Push the negates through the add. -      I->setOperand(0, NegateValue(I->getOperand(0), BI)); -      I->setOperand(1, NegateValue(I->getOperand(1), BI)); +      I->setOperand(0, NegateValue(Context, I->getOperand(0), BI)); +      I->setOperand(1, NegateValue(Context, I->getOperand(1), BI));        // We must move the add instruction here, because the neg instructions do        // not dominate the old add instruction in general.  By moving it, we are @@ -407,7 +408,7 @@ static Value *NegateValue(Value *V, Instruction *BI) {  /// ShouldBreakUpSubtract - Return true if we should break up this subtract of  /// X-Y into (X + -Y). -static bool ShouldBreakUpSubtract(Instruction *Sub) { +static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {    // If this is a negation, we can't split it up!    if (BinaryOperator::isNeg(Sub))      return false; @@ -431,7 +432,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {  /// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is  /// only used by an add, transform this into (X+(0-Y)) to promote better  /// reassociation. -static Instruction *BreakUpSubtract(Instruction *Sub, +static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,                                std::map<AssertingVH<>, unsigned> &ValueRankMap) {    // Convert a subtract into an add and a neg instruction... so that sub    // instructions can be commuted with other add instructions... @@ -439,7 +440,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,    // Calculate the negative value of Operand 1 of the sub instruction...    // and set it as the RHS of the add instruction we just made...    // -  Value *NegVal = NegateValue(Sub->getOperand(1), Sub); +  Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub);    Instruction *New =      BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);    New->takeName(Sub); @@ -449,7 +450,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub,    Sub->replaceAllUsesWith(New);    Sub->eraseFromParent(); -  DOUT << "Negated: " << *New; +  DEBUG(errs() << "Negated: " << *New << '\n');    return New;  } @@ -458,16 +459,16 @@ static Instruction *BreakUpSubtract(Instruction *Sub,  /// reassociation.  static Instruction *ConvertShiftToMul(Instruction *Shl,                                 std::map<AssertingVH<>, unsigned> &ValueRankMap, -                              LLVMContext* Context) { +                              LLVMContext &Context) {    // If an operand of this shift is a reassociable multiply, or if the shift    // is used by a reassociable multiply or add, turn into a multiply.    if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||        (Shl->hasOneUse() &&          (isReassociableOp(Shl->use_back(), Instruction::Mul) ||          isReassociableOp(Shl->use_back(), Instruction::Add)))) { -    Constant *MulCst = Context->getConstantInt(Shl->getType(), 1); +    Constant *MulCst = ConstantInt::get(Shl->getType(), 1);      MulCst = -        Context->getConstantExprShl(MulCst, cast<Constant>(Shl->getOperand(1))); +        ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));      Instruction *Mul = BinaryOperator::CreateMul(Shl->getOperand(0), MulCst,                                                   "", Shl); @@ -567,7 +568,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,    if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))      if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {        Ops.pop_back(); -      Ops.back().Op = Context->getConstantExpr(Opcode, V1, V2); +      Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);        return OptimizeExpression(I, Ops);      } @@ -623,10 +624,10 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,          if (FoundX != i) {            if (Opcode == Instruction::And) {   // ...&X&~X = 0              ++NumAnnihil; -            return Context->getNullValue(X->getType()); +            return Constant::getNullValue(X->getType());            } else if (Opcode == Instruction::Or) {   // ...|X|~X = -1              ++NumAnnihil; -            return Context->getConstantIntAllOnesValue(X->getType()); +            return Constant::getAllOnesValue(X->getType());            }          }        } @@ -645,7 +646,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,            assert(Opcode == Instruction::Xor);            if (e == 2) {              ++NumAnnihil; -            return Context->getNullValue(Ops[0].Op->getType()); +            return Constant::getNullValue(Ops[0].Op->getType());            }            // ... X^X -> ...            Ops.erase(Ops.begin()+i, Ops.begin()+i+2); @@ -670,7 +671,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,            // Remove X and -X from the operand list.            if (Ops.size() == 2) {              ++NumAnnihil; -            return Context->getNullValue(X->getType()); +            return Constant::getNullValue(X->getType());            } else {              Ops.erase(Ops.begin()+i);              if (i < FoundX) @@ -727,7 +728,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,      // If any factor occurred more than one time, we can pull it out.      if (MaxOcc > 1) { -      DOUT << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n"; +      DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << "\n");        // Create a new instruction that uses the MaxOccVal twice.  If we don't do        // this, we could otherwise run into situations where removing a factor @@ -781,6 +782,8 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,  /// ReassociateBB - Inspect all of the instructions in this basic block,  /// reassociating them as we go.  void Reassociate::ReassociateBB(BasicBlock *BB) { +  LLVMContext &Context = BB->getContext(); +      for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {      Instruction *BI = BBI++;      if (BI->getOpcode() == Instruction::Shl && @@ -798,8 +801,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {      // If this is a subtract instruction which is not already in negate form,      // see if we can convert it to X+-Y.      if (BI->getOpcode() == Instruction::Sub) { -      if (ShouldBreakUpSubtract(BI)) { -        BI = BreakUpSubtract(BI, ValueRankMap); +      if (ShouldBreakUpSubtract(Context, BI)) { +        BI = BreakUpSubtract(Context, BI, ValueRankMap);          MadeChange = true;        } else if (BinaryOperator::isNeg(BI)) {          // Otherwise, this is a negation.  See if the operand is a multiply tree @@ -838,7 +841,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {    std::vector<ValueEntry> Ops;    LinearizeExprTree(I, Ops); -  DOUT << "RAIn:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; +  DEBUG(errs() << "RAIn:\t"; PrintOps(I, Ops); errs() << "\n");    // Now that we have linearized the tree to a list and have gathered all of    // the operands and their ranks, sort the operands by their rank.  Use a @@ -853,7 +856,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {    if (Value *V = OptimizeExpression(I, Ops)) {      // This expression tree simplified to something that isn't a tree,      // eliminate it. -    DOUT << "Reassoc to scalar: " << *V << "\n"; +    DEBUG(errs() << "Reassoc to scalar: " << *V << "\n");      I->replaceAllUsesWith(V);      RemoveDeadBinaryOp(I);      return; @@ -871,7 +874,7 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {      Ops.pop_back();    } -  DOUT << "RAOut:\t"; DEBUG(PrintOps(I, Ops)); DOUT << "\n"; +  DEBUG(errs() << "RAOut:\t"; PrintOps(I, Ops); errs() << "\n");    if (Ops.size() == 1) {      // This expression tree simplified to something that isn't a tree, diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index ac95d25b7f7f..99e12522ce0c 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -26,7 +26,6 @@  #include "llvm/BasicBlock.h"  #include "llvm/Instructions.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/CFG.h"  #include <list>  using namespace llvm; @@ -35,7 +34,7 @@ STATISTIC(NumRegsDemoted, "Number of registers demoted");  STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");  namespace { -  struct VISIBILITY_HIDDEN RegToMem : public FunctionPass { +  struct RegToMem : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      RegToMem() : FunctionPass(&ID) {} @@ -44,73 +43,17 @@ namespace {        AU.addPreservedID(BreakCriticalEdgesID);      } -   bool valueEscapes(Instruction* i) { -      BasicBlock* bb = i->getParent(); -      for (Value::use_iterator ii = i->use_begin(), ie = i->use_end(); -           ii != ie; ++ii) -        if (cast<Instruction>(*ii)->getParent() != bb || -            isa<PHINode>(*ii)) +   bool valueEscapes(const Instruction *Inst) const { +     const BasicBlock *BB = Inst->getParent(); +      for (Value::use_const_iterator UI = Inst->use_begin(),E = Inst->use_end(); +           UI != E; ++UI) +        if (cast<Instruction>(*UI)->getParent() != BB || +            isa<PHINode>(*UI))            return true;        return false;      } -    virtual bool runOnFunction(Function &F) { -      if (!F.isDeclaration()) { -        // Insert all new allocas into entry block. -        BasicBlock* BBEntry = &F.getEntryBlock(); -        assert(pred_begin(BBEntry) == pred_end(BBEntry) && -               "Entry block to function must not have predecessors!"); - -        // Find first non-alloca instruction and create insertion point. This is -        // safe if block is well-formed: it always have terminator, otherwise -        // we'll get and assertion. -        BasicBlock::iterator I = BBEntry->begin(); -        while (isa<AllocaInst>(I)) ++I; - -        CastInst *AllocaInsertionPoint = -          CastInst::Create(Instruction::BitCast, -                           Context->getNullValue(Type::Int32Ty), Type::Int32Ty, -                           "reg2mem alloca point", I); - -        // Find the escaped instructions. But don't create stack slots for -        // allocas in entry block. -        std::list<Instruction*> worklist; -        for (Function::iterator ibb = F.begin(), ibe = F.end(); -             ibb != ibe; ++ibb) -          for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); -               iib != iie; ++iib) { -            if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) && -                valueEscapes(iib)) { -              worklist.push_front(&*iib); -            } -          } - -        // Demote escaped instructions -        NumRegsDemoted += worklist.size(); -        for (std::list<Instruction*>::iterator ilb = worklist.begin(),  -               ile = worklist.end(); ilb != ile; ++ilb) -          DemoteRegToStack(**ilb, false, AllocaInsertionPoint); - -        worklist.clear(); - -        // Find all phi's -        for (Function::iterator ibb = F.begin(), ibe = F.end(); -             ibb != ibe; ++ibb) -          for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); -               iib != iie; ++iib) -            if (isa<PHINode>(iib)) -              worklist.push_front(&*iib); - -        // Demote phi nodes -        NumPhisDemoted += worklist.size(); -        for (std::list<Instruction*>::iterator ilb = worklist.begin(),  -               ile = worklist.end(); ilb != ile; ++ilb) -          DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint); - -        return true; -      } -      return false; -    } +    virtual bool runOnFunction(Function &F);    };  } @@ -118,6 +61,66 @@ char RegToMem::ID = 0;  static RegisterPass<RegToMem>  X("reg2mem", "Demote all values to stack slots"); + +bool RegToMem::runOnFunction(Function &F) { +  if (F.isDeclaration())  +    return false; +   +  // Insert all new allocas into entry block. +  BasicBlock *BBEntry = &F.getEntryBlock(); +  assert(pred_begin(BBEntry) == pred_end(BBEntry) && +         "Entry block to function must not have predecessors!"); +   +  // Find first non-alloca instruction and create insertion point. This is +  // safe if block is well-formed: it always have terminator, otherwise +  // we'll get and assertion. +  BasicBlock::iterator I = BBEntry->begin(); +  while (isa<AllocaInst>(I)) ++I; +   +  CastInst *AllocaInsertionPoint = +    new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())), +                    Type::getInt32Ty(F.getContext()), +                    "reg2mem alloca point", I); +   +  // Find the escaped instructions. But don't create stack slots for +  // allocas in entry block. +  std::list<Instruction*> WorkList; +  for (Function::iterator ibb = F.begin(), ibe = F.end(); +       ibb != ibe; ++ibb) +    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); +         iib != iie; ++iib) { +      if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) && +          valueEscapes(iib)) { +        WorkList.push_front(&*iib); +      } +    } +   +  // Demote escaped instructions +  NumRegsDemoted += WorkList.size(); +  for (std::list<Instruction*>::iterator ilb = WorkList.begin(),  +       ile = WorkList.end(); ilb != ile; ++ilb) +    DemoteRegToStack(**ilb, false, AllocaInsertionPoint); +   +  WorkList.clear(); +   +  // Find all phi's +  for (Function::iterator ibb = F.begin(), ibe = F.end(); +       ibb != ibe; ++ibb) +    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end(); +         iib != iie; ++iib) +      if (isa<PHINode>(iib)) +        WorkList.push_front(&*iib); +   +  // Demote phi nodes +  NumPhisDemoted += WorkList.size(); +  for (std::list<Instruction*>::iterator ilb = WorkList.begin(),  +       ile = WorkList.end(); ilb != ile; ++ilb) +    DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint); +   +  return true; +} + +  // createDemoteRegisterToMemory - Provide an entry point to create this pass.  //  const PassInfo *const llvm::DemoteRegisterToMemoryID = &X; diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index f0bc12734734..b5edf4e05821 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -33,9 +33,10 @@  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Support/CallSite.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/InstVisitor.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/DenseSet.h"  #include "llvm/ADT/SmallSet.h" @@ -58,7 +59,7 @@ namespace {  /// LatticeVal class - This class represents the different lattice values that  /// an LLVM value may occupy.  It is a simple class with value semantics.  /// -class VISIBILITY_HIDDEN LatticeVal { +class LatticeVal {    enum {      /// undefined - This LLVM Value has no known value yet.      undefined, @@ -139,7 +140,7 @@ public:  /// Constant Propagation.  ///  class SCCPSolver : public InstVisitor<SCCPSolver> { -  LLVMContext* Context; +  LLVMContext *Context;    DenseSet<BasicBlock*> BBExecutable;// The basic blocks that are executable    std::map<Value*, LatticeVal> ValueState;  // The state each value is in. @@ -179,12 +180,12 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {    typedef std::pair<BasicBlock*, BasicBlock*> Edge;    DenseSet<Edge> KnownFeasibleEdges;  public: -  void setContext(LLVMContext* C) { Context = C; } +  void setContext(LLVMContext *C) { Context = C; }    /// MarkBlockExecutable - This method can be used by clients to mark all of    /// the blocks that are known to be intrinsically live in the processed unit.    void MarkBlockExecutable(BasicBlock *BB) { -    DOUT << "Marking Block Executable: " << BB->getNameStart() << "\n"; +    DEBUG(errs() << "Marking Block Executable: " << BB->getName() << "\n");      BBExecutable.insert(BB);   // Basic block is executable!      BBWorkList.push_back(BB);  // Add the block to the work list!    } @@ -260,14 +261,14 @@ private:    //    inline void markConstant(LatticeVal &IV, Value *V, Constant *C) {      if (IV.markConstant(C)) { -      DOUT << "markConstant: " << *C << ": " << *V; +      DEBUG(errs() << "markConstant: " << *C << ": " << *V << '\n');        InstWorkList.push_back(V);      }    }    inline void markForcedConstant(LatticeVal &IV, Value *V, Constant *C) {      IV.markForcedConstant(C); -    DOUT << "markForcedConstant: " << *C << ": " << *V; +    DEBUG(errs() << "markForcedConstant: " << *C << ": " << *V << '\n');      InstWorkList.push_back(V);    } @@ -280,11 +281,11 @@ private:    // work list so that the users of the instruction are updated later.    inline void markOverdefined(LatticeVal &IV, Value *V) {      if (IV.markOverdefined()) { -      DEBUG(DOUT << "markOverdefined: "; +      DEBUG(errs() << "markOverdefined: ";              if (Function *F = dyn_cast<Function>(V)) -              DOUT << "Function '" << F->getName() << "'\n"; +              errs() << "Function '" << F->getName() << "'\n";              else -              DOUT << *V); +              errs() << *V << '\n');        // Only instructions go on the work list        OverdefinedInstWorkList.push_back(V);      } @@ -337,8 +338,8 @@ private:        return;  // This edge is already known to be executable!      if (BBExecutable.count(Dest)) { -      DOUT << "Marking Edge Executable: " << Source->getNameStart() -           << " -> " << Dest->getNameStart() << "\n"; +      DEBUG(errs() << "Marking Edge Executable: " << Source->getName() +            << " -> " << Dest->getName() << "\n");        // The destination is already executable, but we just made an edge        // feasible that wasn't before.  Revisit the PHI nodes in the block @@ -399,7 +400,9 @@ private:    void visitStoreInst     (Instruction &I);    void visitLoadInst      (LoadInst &I);    void visitGetElementPtrInst(GetElementPtrInst &I); -  void visitCallInst      (CallInst &I) { visitCallSite(CallSite::get(&I)); } +  void visitCallInst      (CallInst &I) {  +    visitCallSite(CallSite::get(&I)); +  }    void visitInvokeInst    (InvokeInst &II) {      visitCallSite(CallSite::get(&II));      visitTerminatorInst(II); @@ -414,7 +417,7 @@ private:    void visitInstruction(Instruction &I) {      // If a new instruction is added to LLVM that we don't handle... -    cerr << "SCCP: Don't know how to handle: " << I; +    errs() << "SCCP: Don't know how to handle: " << I;      markOverdefined(&I);   // Just in case    }  }; @@ -440,7 +443,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,          Succs[0] = Succs[1] = true;        } else if (BCValue.isConstant()) {          // Constant condition variables mean the branch can only go a single way -        Succs[BCValue.getConstant() == Context->getConstantIntFalse()] = true; +        Succs[BCValue.getConstant() == ConstantInt::getFalse(*Context)] = true;        }      }    } else if (isa<InvokeInst>(&TI)) { @@ -455,7 +458,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,      } else if (SCValue.isConstant())        Succs[SI->findCaseValue(cast<ConstantInt>(SCValue.getConstant()))] = true;    } else { -    assert(0 && "SCCP: Don't know how to handle this terminator!"); +    llvm_unreachable("SCCP: Don't know how to handle this terminator!");    }  } @@ -485,7 +488,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {          // Constant condition variables mean the branch can only go a single way          return BI->getSuccessor(BCValue.getConstant() == -                                       Context->getConstantIntFalse()) == To; +                                       ConstantInt::getFalse(*Context)) == To;        }        return false;      } @@ -513,8 +516,10 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {      }      return false;    } else { -    cerr << "Unknown terminator instruction: " << *TI; -    abort(); +#ifndef NDEBUG +    errs() << "Unknown terminator instruction: " << *TI << '\n'; +#endif +    llvm_unreachable(0);    }  } @@ -642,7 +647,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {        DenseMap<std::pair<Function*, unsigned>, LatticeVal>::iterator          It = TrackedMultipleRetVals.find(std::make_pair(F, i));        if (It == TrackedMultipleRetVals.end()) break; -      if (Value *Val = FindInsertedValue(I.getOperand(0), i)) +      if (Value *Val = FindInsertedValue(I.getOperand(0), i, I.getContext()))          mergeInValue(It->second, F, getValueState(Val));      }    } @@ -666,7 +671,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {    if (VState.isOverdefined())          // Inherit overdefinedness of operand      markOverdefined(&I);    else if (VState.isConstant())        // Propagate constant value -    markConstant(&I, Context->getConstantExprCast(I.getOpcode(),  +    markConstant(&I, ConstantExpr::getCast(I.getOpcode(),                                              VState.getConstant(), I.getType()));  } @@ -809,12 +814,12 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {          if (NonOverdefVal->isUndefined()) {            // Could annihilate value.            if (I.getOpcode() == Instruction::And) -            markConstant(IV, &I, Context->getNullValue(I.getType())); +            markConstant(IV, &I, Constant::getNullValue(I.getType()));            else if (const VectorType *PT = dyn_cast<VectorType>(I.getType())) -            markConstant(IV, &I, Context->getConstantVectorAllOnesValue(PT)); +            markConstant(IV, &I, Constant::getAllOnesValue(PT));            else              markConstant(IV, &I, -                         Context->getConstantIntAllOnesValue(I.getType())); +                         Constant::getAllOnesValue(I.getType()));            return;          } else {            if (I.getOpcode() == Instruction::And) { @@ -859,7 +864,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {                break;  // Cannot fold this operation over the PHI nodes!              } else if (In1.isConstant() && In2.isConstant()) {                Constant *V = -                     Context->getConstantExpr(I.getOpcode(), In1.getConstant(), +                     ConstantExpr::get(I.getOpcode(), In1.getConstant(),                                                In2.getConstant());                if (Result.isUndefined())                  Result.markConstant(V); @@ -908,7 +913,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {      markOverdefined(IV, &I);    } else if (V1State.isConstant() && V2State.isConstant()) {      markConstant(IV, &I, -                Context->getConstantExpr(I.getOpcode(), V1State.getConstant(), +                ConstantExpr::get(I.getOpcode(), V1State.getConstant(),                                             V2State.getConstant()));    }  } @@ -945,7 +950,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {                Result.markOverdefined();                break;  // Cannot fold this operation over the PHI nodes!              } else if (In1.isConstant() && In2.isConstant()) { -              Constant *V = Context->getConstantExprCompare(I.getPredicate(),  +              Constant *V = ConstantExpr::getCompare(I.getPredicate(),                                                        In1.getConstant(),                                                        In2.getConstant());                if (Result.isUndefined()) @@ -994,7 +999,7 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {      markOverdefined(IV, &I);    } else if (V1State.isConstant() && V2State.isConstant()) { -    markConstant(IV, &I, Context->getConstantExprCompare(I.getPredicate(),  +    markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),                                                     V1State.getConstant(),                                                     V2State.getConstant()));    } @@ -1096,7 +1101,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {    Constant *Ptr = Operands[0];    Operands.erase(Operands.begin());  // Erase the pointer from idx list... -  markConstant(IV, &I, Context->getConstantExprGetElementPtr(Ptr, &Operands[0], +  markConstant(IV, &I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0],                                                        Operands.size()));  } @@ -1127,10 +1132,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {    if (PtrVal.isConstant() && !I.isVolatile()) {      Value *Ptr = PtrVal.getConstant();      // TODO: Consider a target hook for valid address spaces for this xform. -    if (isa<ConstantPointerNull>(Ptr) &&  -        cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) { +    if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0) {        // load null -> null -      markConstant(IV, &I, Context->getNullValue(I.getType())); +      markConstant(IV, &I, Constant::getNullValue(I.getType()));        return;      } @@ -1179,7 +1183,7 @@ void SCCPSolver::visitCallSite(CallSite CS) {    if (F == 0 || !F->hasLocalLinkage()) {  CallOverdefined:      // Void return and not tracking callee, just bail. -    if (I->getType() == Type::VoidTy) return; +    if (I->getType()->isVoidTy()) return;      // Otherwise, if we have a single return value case, and if the function is      // a declaration, maybe we can constant fold it. @@ -1258,6 +1262,10 @@ CallOverdefined:    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();         AI != E; ++AI, ++CAI) {      LatticeVal &IV = ValueState[AI]; +    if (AI->hasByValAttr() && !F->onlyReadsMemory()) { +      IV.markOverdefined(); +      continue; +    }      if (!IV.isOverdefined())        mergeInValue(IV, AI, getValueState(*CAI));    } @@ -1273,7 +1281,7 @@ void SCCPSolver::Solve() {        Value *I = OverdefinedInstWorkList.back();        OverdefinedInstWorkList.pop_back(); -      DOUT << "\nPopped off OI-WL: " << *I; +      DEBUG(errs() << "\nPopped off OI-WL: " << *I << '\n');        // "I" got into the work list because it either made the transition from        // bottom to constant @@ -1291,7 +1299,7 @@ void SCCPSolver::Solve() {        Value *I = InstWorkList.back();        InstWorkList.pop_back(); -      DOUT << "\nPopped off I-WL: " << *I; +      DEBUG(errs() << "\nPopped off I-WL: " << *I << '\n');        // "I" got into the work list because it either made the transition from        // bottom to constant @@ -1311,7 +1319,7 @@ void SCCPSolver::Solve() {        BasicBlock *BB = BBWorkList.back();        BBWorkList.pop_back(); -      DOUT << "\nPopped off BBWL: " << *BB; +      DEBUG(errs() << "\nPopped off BBWL: " << *BB << '\n');        // Notify all instructions in this basic block that they are newly        // executable. @@ -1345,7 +1353,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {        // Look for instructions which produce undef values. -      if (I->getType() == Type::VoidTy) continue; +      if (I->getType()->isVoidTy()) continue;        LatticeVal &LV = getValueState(I);        if (!LV.isUndefined()) continue; @@ -1371,22 +1379,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {          // to be handled here, because we don't know whether the top part is 1's          // or 0's.          assert(Op0LV.isUndefined()); -        markForcedConstant(LV, I, Context->getNullValue(ITy)); +        markForcedConstant(LV, I, Constant::getNullValue(ITy));          return true;        case Instruction::Mul:        case Instruction::And:          // undef * X -> 0.   X could be zero.          // undef & X -> 0.   X could be zero. -        markForcedConstant(LV, I, Context->getNullValue(ITy)); +        markForcedConstant(LV, I, Constant::getNullValue(ITy));          return true;        case Instruction::Or:          // undef | X -> -1.   X could be -1.          if (const VectorType *PTy = dyn_cast<VectorType>(ITy))            markForcedConstant(LV, I, -                             Context->getConstantVectorAllOnesValue(PTy)); +                             Constant::getAllOnesValue(PTy));          else           -          markForcedConstant(LV, I, Context->getConstantIntAllOnesValue(ITy)); +          markForcedConstant(LV, I, Constant::getAllOnesValue(ITy));          return true;        case Instruction::SDiv: @@ -1399,7 +1407,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {          // undef / X -> 0.   X could be maxint.          // undef % X -> 0.   X could be 1. -        markForcedConstant(LV, I, Context->getNullValue(ITy)); +        markForcedConstant(LV, I, Constant::getNullValue(ITy));          return true;        case Instruction::AShr: @@ -1420,7 +1428,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {          // X >> undef -> 0.  X could be 0.          // X << undef -> 0.  X could be 0. -        markForcedConstant(LV, I, Context->getNullValue(ITy)); +        markForcedConstant(LV, I, Constant::getNullValue(ITy));          return true;        case Instruction::Select:          // undef ? X : Y  -> X or Y.  There could be commonality between X/Y. @@ -1483,7 +1491,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {      // as undef, then further analysis could think the undef went another way      // leading to an inconsistent set of conclusions.      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { -      BI->setCondition(Context->getConstantIntFalse()); +      BI->setCondition(ConstantInt::getFalse(*Context));      } else {        SwitchInst *SI = cast<SwitchInst>(TI);        SI->setCondition(SI->getCaseValue(1)); @@ -1502,7 +1510,7 @@ namespace {    /// SCCP Class - This class uses the SCCPSolver to implement a per-function    /// Sparse Conditional Constant Propagator.    /// -  struct VISIBILITY_HIDDEN SCCP : public FunctionPass { +  struct SCCP : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      SCCP() : FunctionPass(&ID) {} @@ -1531,9 +1539,9 @@ FunctionPass *llvm::createSCCPPass() {  // and return true if the function was modified.  //  bool SCCP::runOnFunction(Function &F) { -  DOUT << "SCCP on function '" << F.getNameStart() << "'\n"; +  DEBUG(errs() << "SCCP on function '" << F.getName() << "'\n");    SCCPSolver Solver; -  Solver.setContext(Context); +  Solver.setContext(&F.getContext());    // Mark the first block of the function as being executable.    Solver.MarkBlockExecutable(F.begin()); @@ -1546,7 +1554,7 @@ bool SCCP::runOnFunction(Function &F) {    bool ResolvedUndefs = true;    while (ResolvedUndefs) {      Solver.Solve(); -    DOUT << "RESOLVING UNDEFs\n"; +    DEBUG(errs() << "RESOLVING UNDEFs\n");      ResolvedUndefs = Solver.ResolvedUndefsIn(F);    } @@ -1561,7 +1569,7 @@ bool SCCP::runOnFunction(Function &F) {    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)      if (!Solver.isBlockExecutable(BB)) { -      DOUT << "  BasicBlock Dead:" << *BB; +      DEBUG(errs() << "  BasicBlock Dead:" << *BB);        ++NumDeadBlocks;        // Delete the instructions backwards, as it has a reduced likelihood of @@ -1573,7 +1581,7 @@ bool SCCP::runOnFunction(Function &F) {          Instruction *I = Insts.back();          Insts.pop_back();          if (!I->use_empty()) -          I->replaceAllUsesWith(Context->getUndef(I->getType())); +          I->replaceAllUsesWith(UndefValue::get(I->getType()));          BB->getInstList().erase(I);          MadeChanges = true;          ++NumInstRemoved; @@ -1584,8 +1592,7 @@ bool SCCP::runOnFunction(Function &F) {        //        for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {          Instruction *Inst = BI++; -        if (Inst->getType() == Type::VoidTy || -            isa<TerminatorInst>(Inst)) +        if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))            continue;          LatticeVal &IV = Values[Inst]; @@ -1593,8 +1600,8 @@ bool SCCP::runOnFunction(Function &F) {            continue;          Constant *Const = IV.isConstant() -          ? IV.getConstant() : Context->getUndef(Inst->getType()); -        DOUT << "  Constant: " << *Const << " = " << *Inst; +          ? IV.getConstant() : UndefValue::get(Inst->getType()); +        DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);          // Replaces all of the uses of a variable with uses of the constant.          Inst->replaceAllUsesWith(Const); @@ -1617,7 +1624,7 @@ namespace {    /// IPSCCP Class - This class implements interprocedural Sparse Conditional    /// Constant Propagation.    /// -  struct VISIBILITY_HIDDEN IPSCCP : public ModulePass { +  struct IPSCCP : public ModulePass {      static char ID;      IPSCCP() : ModulePass(&ID) {}      bool runOnModule(Module &M); @@ -1658,7 +1665,10 @@ static bool AddressIsTaken(GlobalValue *GV) {  }  bool IPSCCP::runOnModule(Module &M) { +  LLVMContext *Context = &M.getContext(); +      SCCPSolver Solver; +  Solver.setContext(Context);    // Loop over all functions, marking arguments to those with their addresses    // taken or that are external as overdefined. @@ -1687,7 +1697,7 @@ bool IPSCCP::runOnModule(Module &M) {    while (ResolvedUndefs) {      Solver.Solve(); -    DOUT << "RESOLVING UNDEFS\n"; +    DEBUG(errs() << "RESOLVING UNDEFS\n");      ResolvedUndefs = false;      for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)        ResolvedUndefs |= Solver.ResolvedUndefsIn(*F); @@ -1709,8 +1719,8 @@ bool IPSCCP::runOnModule(Module &M) {          LatticeVal &IV = Values[AI];          if (IV.isConstant() || IV.isUndefined()) {            Constant *CST = IV.isConstant() ? -            IV.getConstant() : Context->getUndef(AI->getType()); -          DOUT << "***  Arg " << *AI << " = " << *CST <<"\n"; +            IV.getConstant() : UndefValue::get(AI->getType()); +          DEBUG(errs() << "***  Arg " << *AI << " = " << *CST <<"\n");            // Replaces all of the uses of a variable with uses of the            // constant. @@ -1721,7 +1731,7 @@ bool IPSCCP::runOnModule(Module &M) {      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)        if (!Solver.isBlockExecutable(BB)) { -        DOUT << "  BasicBlock Dead:" << *BB; +        DEBUG(errs() << "  BasicBlock Dead:" << *BB);          ++IPNumDeadBlocks;          // Delete the instructions backwards, as it has a reduced likelihood of @@ -1734,7 +1744,7 @@ bool IPSCCP::runOnModule(Module &M) {            Instruction *I = Insts.back();            Insts.pop_back();            if (!I->use_empty()) -            I->replaceAllUsesWith(Context->getUndef(I->getType())); +            I->replaceAllUsesWith(UndefValue::get(I->getType()));            BB->getInstList().erase(I);            MadeChanges = true;            ++IPNumInstRemoved; @@ -1746,18 +1756,18 @@ bool IPSCCP::runOnModule(Module &M) {              TI->getSuccessor(i)->removePredecessor(BB);          }          if (!TI->use_empty()) -          TI->replaceAllUsesWith(Context->getUndef(TI->getType())); +          TI->replaceAllUsesWith(UndefValue::get(TI->getType()));          BB->getInstList().erase(TI);          if (&*BB != &F->front())            BlocksToErase.push_back(BB);          else -          new UnreachableInst(BB); +          new UnreachableInst(M.getContext(), BB);        } else {          for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {            Instruction *Inst = BI++; -          if (Inst->getType() == Type::VoidTy) +          if (Inst->getType()->isVoidTy())              continue;            LatticeVal &IV = Values[Inst]; @@ -1765,8 +1775,8 @@ bool IPSCCP::runOnModule(Module &M) {              continue;            Constant *Const = IV.isConstant() -            ? IV.getConstant() : Context->getUndef(Inst->getType()); -          DOUT << "  Constant: " << *Const << " = " << *Inst; +            ? IV.getConstant() : UndefValue::get(Inst->getType()); +          DEBUG(errs() << "  Constant: " << *Const << " = " << *Inst);            // Replaces all of the uses of a variable with uses of the            // constant. @@ -1802,7 +1812,7 @@ bool IPSCCP::runOnModule(Module &M) {            } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {              assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");            } else { -            assert(0 && "Didn't fold away reference to block!"); +            llvm_unreachable("Didn't fold away reference to block!");            }  #endif @@ -1834,12 +1844,12 @@ bool IPSCCP::runOnModule(Module &M) {    for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),           E = RV.end(); I != E; ++I)      if (!I->second.isOverdefined() && -        I->first->getReturnType() != Type::VoidTy) { +        !I->first->getReturnType()->isVoidTy()) {        Function *F = I->first;        for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)          if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))            if (!isa<UndefValue>(RI->getOperand(0))) -            RI->setOperand(0, Context->getUndef(F->getReturnType())); +            RI->setOperand(0, UndefValue::get(F->getReturnType()));      }    // If we infered constant or undef values for globals variables, we can delete @@ -1850,7 +1860,7 @@ bool IPSCCP::runOnModule(Module &M) {      GlobalVariable *GV = I->first;      assert(!I->second.isOverdefined() &&             "Overdefined values should have been taken out of the map!"); -    DOUT << "Found that GV '" << GV->getNameStart() << "' is constant!\n"; +    DEBUG(errs() << "Found that GV '" << GV->getName() << "' is constant!\n");      while (!GV->use_empty()) {        StoreInst *SI = cast<StoreInst>(GV->use_back());        SI->eraseFromParent(); diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 109fb90d52f3..610d874b3684 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -34,13 +34,13 @@  #include "llvm/Transforms/Utils/PromoteMemToReg.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Support/IRBuilder.h"  #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h"  using namespace llvm;  STATISTIC(NumReplaced,  "Number of allocas broken up"); @@ -49,7 +49,7 @@ STATISTIC(NumConverted, "Number of aggregates converted to scalar");  STATISTIC(NumGlobals,   "Number of allocas copied from constant global");  namespace { -  struct VISIBILITY_HIDDEN SROA : public FunctionPass { +  struct SROA : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      explicit SROA(signed T = -1) : FunctionPass(&ID) {        if (T == -1) @@ -68,7 +68,6 @@ namespace {      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        AU.addRequired<DominatorTree>();        AU.addRequired<DominanceFrontier>(); -      AU.addRequired<TargetData>();        AU.setPreservesCFG();      } @@ -150,9 +149,16 @@ FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {  bool SROA::runOnFunction(Function &F) { -  TD = &getAnalysis<TargetData>(); -   +  TD = getAnalysisIfAvailable<TargetData>(); +    bool Changed = performPromotion(F); + +  // FIXME: ScalarRepl currently depends on TargetData more than it +  // theoretically needs to. It should be refactored in order to support +  // target-independent IR. Until this is done, just skip the actual +  // scalar-replacement portion of this pass. +  if (!TD) return Changed; +    while (1) {      bool LocalChange = performScalarRepl(F);      if (!LocalChange) break;   // No need to repromote if no scalarrepl @@ -186,7 +192,7 @@ bool SROA::performPromotion(Function &F) {      if (Allocas.empty()) break; -    PromoteMemToReg(Allocas, DT, DF); +    PromoteMemToReg(Allocas, DT, DF, F.getContext());      NumPromoted += Allocas.size();      Changed = true;    } @@ -238,11 +244,10 @@ bool SROA::performScalarRepl(Function &F) {      // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'      // is only subsequently read.      if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { -      DOUT << "Found alloca equal to global: " << *AI; -      DOUT << "  memcpy = " << *TheCopy; +      DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n'); +      DEBUG(errs() << "  memcpy = " << *TheCopy << '\n');        Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); -      AI->replaceAllUsesWith( -                        Context->getConstantExprBitCast(TheSrc, AI->getType())); +      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));        TheCopy->eraseFromParent();  // Don't mutate the global.        AI->eraseFromParent();        ++NumGlobals; @@ -256,9 +261,12 @@ bool SROA::performScalarRepl(Function &F) {      // value cannot be decomposed at all.      uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); +    // Do not promote [0 x %struct]. +    if (AllocaSize == 0) continue; +      // Do not promote any struct whose size is too big.      if (AllocaSize > SRThreshold) continue; -         +      if ((isa<StructType>(AI->getAllocatedType()) ||           isa<ArrayType>(AI->getAllocatedType())) &&          // Do not promote any struct into more than "32" separate vars. @@ -266,7 +274,7 @@ bool SROA::performScalarRepl(Function &F) {        // Check that all of the users of the allocation are capable of being        // transformed.        switch (isSafeAllocaToScalarRepl(AI)) { -      default: assert(0 && "Unexpected value!"); +      default: llvm_unreachable("Unexpected value!");        case 0:  // Not safe to scalar replace.          break;        case 1:  // Safe, but requires cleanup/canonicalizations first @@ -298,16 +306,17 @@ bool SROA::performScalarRepl(Function &F) {        // we just get a lot of insert/extracts.  If at least one vector is        // involved, then we probably really do have a union of vector/array.        if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) { -        DOUT << "CONVERT TO VECTOR: " << *AI << "  TYPE = " << *VectorTy <<"\n"; +        DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = " +                     << *VectorTy << '\n');          // Create and insert the vector alloca. -        NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); +        NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());          ConvertUsesToScalar(AI, NewAI, 0);        } else { -        DOUT << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"; +        DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");          // Create and insert the integer alloca. -        const Type *NewTy = Context->getIntegerType(AllocaSize*8); +        const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);          NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());          ConvertUsesToScalar(AI, NewAI, 0);        } @@ -328,14 +337,14 @@ bool SROA::performScalarRepl(Function &F) {  /// predicate, do SROA now.  void SROA::DoScalarReplacement(AllocationInst *AI,                                  std::vector<AllocationInst*> &WorkList) { -  DOUT << "Found inst to SROA: " << *AI; +  DEBUG(errs() << "Found inst to SROA: " << *AI << '\n');    SmallVector<AllocaInst*, 32> ElementAllocas;    if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {      ElementAllocas.reserve(ST->getNumContainedTypes());      for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {        AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,                                         AI->getAlignment(), -                                      AI->getName() + "." + utostr(i), AI); +                                      AI->getName() + "." + Twine(i), AI);        ElementAllocas.push_back(NA);        WorkList.push_back(NA);  // Add to worklist for recursive processing      } @@ -345,7 +354,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,      const Type *ElTy = AT->getElementType();      for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {        AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), -                                      AI->getName() + "." + utostr(i), AI); +                                      AI->getName() + "." + Twine(i), AI);        ElementAllocas.push_back(NA);        WorkList.push_back(NA);  // Add to worklist for recursive processing      } @@ -371,7 +380,7 @@ void SROA::DoScalarReplacement(AllocationInst *AI,      //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1       // (Also works for arrays instead of structs)      if (LoadInst *LI = dyn_cast<LoadInst>(User)) { -      Value *Insert = Context->getUndef(LI->getType()); +      Value *Insert = UndefValue::get(LI->getType());        for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {          Value *Load = new LoadInst(ElementAllocas[i], "load", LI);          Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); @@ -418,7 +427,8 @@ void SROA::DoScalarReplacement(AllocationInst *AI,        // expanded itself once the worklist is rerun.        //        SmallVector<Value*, 8> NewArgs; -      NewArgs.push_back(Context->getNullValue(Type::Int32Ty)); +      NewArgs.push_back(Constant::getNullValue( +                                           Type::getInt32Ty(AI->getContext())));        NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());        RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),                                             NewArgs.end(), "", GEPI); @@ -478,7 +488,7 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,          if (Info.isUnsafe) return;          break;        } -      DOUT << "  Transformation preventing inst: " << *User; +      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');        return MarkUnsafe(Info);      case Instruction::Call:        if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { @@ -488,10 +498,10 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocationInst *AI,            break;          }        } -      DOUT << "  Transformation preventing inst: " << *User; +      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');        return MarkUnsafe(Info);      default: -      DOUT << "  Transformation preventing inst: " << *User; +      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');        return MarkUnsafe(Info);      }    } @@ -531,7 +541,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocationInst *AI,    // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".    if (I == E || -      I.getOperand() != Context->getNullValue(I.getOperand()->getType())) { +      I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {      return MarkUnsafe(Info);    } @@ -727,6 +737,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,    // that doesn't have anything to do with the alloca that we are promoting. For    // memset, this Value* stays null.    Value *OtherPtr = 0; +  LLVMContext &Context = MI->getContext();    unsigned MemAlignment = MI->getAlignment();    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy      if (BCInst == MTI->getRawDest()) @@ -764,7 +775,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,    const Type *BytePtrTy = MI->getRawDest()->getType();    bool SROADest = MI->getRawDest() == BCInst; -  Constant *Zero = Context->getNullValue(Type::Int32Ty); +  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {      // If this is a memcpy/memmove, emit a GEP of the other element address. @@ -772,9 +783,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,      unsigned OtherEltAlign = MemAlignment;      if (OtherPtr) { -      Value *Idx[2] = { Zero, Context->getConstantInt(Type::Int32Ty, i) }; +      Value *Idx[2] = { Zero, +                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };        OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, -                                           OtherPtr->getNameStr()+"."+utostr(i), +                                           OtherPtr->getNameStr()+"."+Twine(i),                                             MI);        uint64_t EltOffset;        const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); @@ -819,7 +831,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,        Constant *StoreVal;        if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {          if (CI->isZero()) { -          StoreVal = Context->getNullValue(EltTy);  // 0.0, null, 0, <0,0> +          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>          } else {            // If EltTy is a vector type, get the element type.            const Type *ValTy = EltTy->getScalarType(); @@ -835,18 +847,18 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,            }            // Convert the integer value to the appropriate type. -          StoreVal = Context->getConstantInt(TotalVal); +          StoreVal = ConstantInt::get(Context, TotalVal);            if (isa<PointerType>(ValTy)) -            StoreVal = Context->getConstantExprIntToPtr(StoreVal, ValTy); +            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);            else if (ValTy->isFloatingPoint()) -            StoreVal = Context->getConstantExprBitCast(StoreVal, ValTy); +            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);            assert(StoreVal->getType() == ValTy && "Type mismatch!");            // If the requested value was a vector constant, create it.            if (EltTy != ValTy) {              unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();              SmallVector<Constant*, 16> Elts(NumElts, StoreVal); -            StoreVal = Context->getConstantVector(&Elts[0], NumElts); +            StoreVal = ConstantVector::get(&Elts[0], NumElts);            }          }          new StoreInst(StoreVal, EltPtr, MI); @@ -872,15 +884,16 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,        Value *Ops[] = {          SROADest ? EltPtr : OtherElt,  // Dest ptr          SROADest ? OtherElt : EltPtr,  // Src ptr -        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size -        Context->getConstantInt(Type::Int32Ty, OtherEltAlign)  // Align +        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size +        // Align +        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)        };        CallInst::Create(TheFn, Ops, Ops + 4, "", MI);      } else {        assert(isa<MemSetInst>(MI));        Value *Ops[] = {          EltPtr, MI->getOperand(2),  // Dest, Value, -        Context->getConstantInt(MI->getOperand(3)->getType(), EltSize), // Size +        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size          Zero  // Align        };        CallInst::Create(TheFn, Ops, Ops + 4, "", MI); @@ -910,9 +923,11 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,    // Handle tail padding by extending the operand    if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)      SrcVal = new ZExtInst(SrcVal, -                          Context->getIntegerType(AllocaSizeBits), "", SI); +                          IntegerType::get(SI->getContext(), AllocaSizeBits),  +                          "", SI); -  DOUT << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << *SI; +  DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI +               << '\n');    // There are two forms here: AI could be an array or struct.  Both cases    // have different ways to compute the element offset. @@ -929,7 +944,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,        Value *EltVal = SrcVal;        if (Shift) { -        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift); +        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);          EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,                                              "sroa.store.elt", SI);        } @@ -942,7 +957,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,        if (FieldSizeBits != AllocaSizeBits)          EltVal = new TruncInst(EltVal, -                               Context->getIntegerType(FieldSizeBits), "", SI); +                             IntegerType::get(SI->getContext(), FieldSizeBits), +                              "", SI);        Value *DestField = NewElts[i];        if (EltVal->getType() == FieldTy) {          // Storing to an integer field of this size, just do it. @@ -952,7 +968,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,        } else {          // Otherwise, bitcast the dest pointer (for aggregates).          DestField = new BitCastInst(DestField, -                              Context->getPointerTypeUnqual(EltVal->getType()), +                              PointerType::getUnqual(EltVal->getType()),                                      "", SI);        }        new StoreInst(EltVal, DestField, SI); @@ -977,7 +993,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,        Value *EltVal = SrcVal;        if (Shift) { -        Value *ShiftVal = Context->getConstantInt(EltVal->getType(), Shift); +        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);          EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,                                              "sroa.store.elt", SI);        } @@ -985,7 +1001,8 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,        // Truncate down to an integer of the right size.        if (ElementSizeBits != AllocaSizeBits)          EltVal = new TruncInst(EltVal,  -                               Context->getIntegerType(ElementSizeBits),"",SI); +                               IntegerType::get(SI->getContext(),  +                                                ElementSizeBits),"",SI);        Value *DestField = NewElts[i];        if (EltVal->getType() == ArrayEltTy) {          // Storing to an integer field of this size, just do it. @@ -995,7 +1012,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI,        } else {          // Otherwise, bitcast the dest pointer (for aggregates).          DestField = new BitCastInst(DestField, -                              Context->getPointerTypeUnqual(EltVal->getType()), +                              PointerType::getUnqual(EltVal->getType()),                                      "", SI);        }        new StoreInst(EltVal, DestField, SI); @@ -1026,7 +1043,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,        TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)      return; -  DOUT << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << *LI; +  DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI +               << '\n');    // There are two forms here: AI could be an array or struct.  Both cases    // have different ways to compute the element offset. @@ -1038,9 +1056,9 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,      const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();      ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);    }     -     -  Value *ResultVal = -                 Context->getNullValue(Context->getIntegerType(AllocaSizeBits)); +   +  Value *ResultVal =  +    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {      // Load the value from the alloca.  If the NewElt is an aggregate, cast @@ -1053,11 +1071,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,      // Ignore zero sized fields like {}, they obviously contain no data.      if (FieldSizeBits == 0) continue; -    const IntegerType *FieldIntTy = Context->getIntegerType(FieldSizeBits); +    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),  +                                                     FieldSizeBits);      if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&          !isa<VectorType>(FieldTy))        SrcField = new BitCastInst(SrcField, -                                 Context->getPointerTypeUnqual(FieldIntTy), +                                 PointerType::getUnqual(FieldIntTy),                                   "", LI);      SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); @@ -1082,7 +1101,7 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocationInst *AI,        Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();      if (Shift) { -      Value *ShiftVal = Context->getConstantInt(SrcField->getType(), Shift); +      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);        SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);      } @@ -1152,7 +1171,8 @@ int SROA::isSafeAllocaToScalarRepl(AllocationInst *AI) {         I != E; ++I) {      isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);      if (Info.isUnsafe) { -      DOUT << "Cannot transform: " << *AI << "  due to user: " << **I; +      DEBUG(errs() << "Cannot transform: " << *AI << "\n  due to user: " +                   << **I << '\n');        return 0;      }    } @@ -1186,24 +1206,25 @@ void SROA::CleanupGEP(GetElementPtrInst *GEPI) {      return;    if (NumElements == 1) { -    GEPI->setOperand(2, Context->getNullValue(Type::Int32Ty)); +    GEPI->setOperand(2,  +                  Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())));      return;    }     assert(NumElements == 2 && "Unhandled case!");    // All users of the GEP must be loads.  At each use of the GEP, insert    // two loads of the appropriate indexed GEP and select between them. -  Value *IsOne = new ICmpInst(ICmpInst::ICMP_NE, I.getOperand(),  -                              Context->getNullValue(I.getOperand()->getType()), -                              "isone", GEPI); +  Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(),  +                              Constant::getNullValue(I.getOperand()->getType()), +                              "isone");    // Insert the new GEP instructions, which are properly indexed.    SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end()); -  Indices[1] = Context->getNullValue(Type::Int32Ty); +  Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));    Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),                                               Indices.begin(),                                               Indices.end(),                                               GEPI->getName()+".0", GEPI); -  Indices[1] = Context->getConstantInt(Type::Int32Ty, 1); +  Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1);    Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),                                              Indices.begin(),                                              Indices.end(), @@ -1261,9 +1282,9 @@ void SROA::CleanupAllocaUsers(AllocationInst *AI) {  ///      and stores would mutate the memory.  static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,                          unsigned AllocaSize, const TargetData &TD, -                        LLVMContext* Context) { +                        LLVMContext &Context) {    // If this could be contributing to a vector, analyze it. -  if (VecTy != Type::VoidTy) { // either null or a vector type. +  if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type.      // If the In type is a vector that is the same size as the alloca, see if it      // matches the existing VecTy. @@ -1276,7 +1297,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,            VecTy = VInTy;          return;        } -    } else if (In == Type::FloatTy || In == Type::DoubleTy || +    } else if (In->isFloatTy() || In->isDoubleTy() ||                 (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&                  isPowerOf2_32(In->getPrimitiveSizeInBits()))) {        // If we're accessing something that could be an element of a vector, see @@ -1289,7 +1310,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,             cast<VectorType>(VecTy)->getElementType()                   ->getPrimitiveSizeInBits()/8 == EltSize)) {          if (VecTy == 0) -          VecTy = Context->getVectorType(In, AllocaSize/EltSize); +          VecTy = VectorType::get(In, AllocaSize/EltSize);          return;        }      } @@ -1297,7 +1318,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,    // Otherwise, we have a case that we can't handle with an optimized vector    // form.  We can still turn this into a large integer. -  VecTy = Type::VoidTy; +  VecTy = Type::getVoidTy(Context);  }  /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all @@ -1320,7 +1341,8 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,        // Don't break volatile loads.        if (LI->isVolatile())          return false; -      MergeInType(LI->getType(), Offset, VecTy, AllocaSize, *TD, Context); +      MergeInType(LI->getType(), Offset, VecTy, +                  AllocaSize, *TD, V->getContext());        SawVec |= isa<VectorType>(LI->getType());        continue;      } @@ -1329,7 +1351,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,        // Storing the pointer, not into the value?        if (SI->getOperand(0) == V || SI->isVolatile()) return 0;        MergeInType(SI->getOperand(0)->getType(), Offset, -                  VecTy, AllocaSize, *TD, Context); +                  VecTy, AllocaSize, *TD, V->getContext());        SawVec |= isa<VectorType>(SI->getOperand(0)->getType());        continue;      } @@ -1433,7 +1455,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {        assert(SI->getOperand(0) != Ptr && "Consistency error!"); -      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); +      // FIXME: Remove once builder has Twine API. +      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());        Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,                                               Builder);        Builder.CreateStore(New, NewAI); @@ -1457,8 +1480,10 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {            for (unsigned i = 1; i != NumBytes; ++i)              APVal |= APVal << 8; -        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").c_str()); -        Value *New = ConvertScalar_InsertValue(Context->getConstantInt(APVal), +        // FIXME: Remove once builder has Twine API. +        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str()); +        Value *New = ConvertScalar_InsertValue( +                                    ConstantInt::get(User->getContext(), APVal),                                                 Old, Offset, Builder);          Builder.CreateStore(New, NewAI);        } @@ -1510,8 +1535,7 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {        continue;      } -    assert(0 && "Unsupported operation!"); -    abort(); +    llvm_unreachable("Unsupported operation!");    }  } @@ -1545,9 +1569,8 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,        assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");      }      // Return the element extracted out of it. -    Value *V = Builder.CreateExtractElement(FromVal, -                                    Context->getConstantInt(Type::Int32Ty,Elt), -                                            "tmp"); +    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( +                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");      if (V->getType() != ToType)        V = Builder.CreateBitCast(V, ToType, "tmp");      return V; @@ -1557,7 +1580,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,    // use insertvalue's to form the FCA.    if (const StructType *ST = dyn_cast<StructType>(ToType)) {      const StructLayout &Layout = *TD->getStructLayout(ST); -    Value *Res = Context->getUndef(ST); +    Value *Res = UndefValue::get(ST);      for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {        Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),                                          Offset+Layout.getElementOffsetInBits(i), @@ -1569,7 +1592,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,    if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {      uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); -    Value *Res = Context->getUndef(AT); +    Value *Res = UndefValue::get(AT);      for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {        Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),                                                Offset+i*EltSize, Builder); @@ -1599,21 +1622,23 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,    // only some bits are used.    if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())      FromVal = Builder.CreateLShr(FromVal, -                                 Context->getConstantInt(FromVal->getType(), +                                 ConstantInt::get(FromVal->getType(),                                                             ShAmt), "tmp");    else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())      FromVal = Builder.CreateShl(FromVal,  -                                Context->getConstantInt(FromVal->getType(), +                                ConstantInt::get(FromVal->getType(),                                                            -ShAmt), "tmp");    // Finally, unconditionally truncate the integer to the right width.    unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);    if (LIBitWidth < NTy->getBitWidth())      FromVal = -      Builder.CreateTrunc(FromVal, Context->getIntegerType(LIBitWidth), "tmp"); +      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),  +                                                    LIBitWidth), "tmp");    else if (LIBitWidth > NTy->getBitWidth())      FromVal = -       Builder.CreateZExt(FromVal, Context->getIntegerType(LIBitWidth), "tmp"); +       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),  +                                                    LIBitWidth), "tmp");    // If the result is an integer, this is a trunc or bitcast.    if (isa<IntegerType>(ToType)) { @@ -1645,6 +1670,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,    // Convert the stored type to the actual type, shift it left to insert    // then 'or' into place.    const Type *AllocaType = Old->getType(); +  LLVMContext &Context = Old->getContext();    if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {      uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); @@ -1664,7 +1690,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,        SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");      SV = Builder.CreateInsertElement(Old, SV,  -                                   Context->getConstantInt(Type::Int32Ty, Elt), +                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),                                       "tmp");      return SV;    } @@ -1697,9 +1723,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,    unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());    unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);    if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType())) -    SV = Builder.CreateBitCast(SV, Context->getIntegerType(SrcWidth), "tmp"); +    SV = Builder.CreateBitCast(SV, +                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");    else if (isa<PointerType>(SV->getType())) -    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(), "tmp"); +    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");    // Zero extend or truncate the value if needed.    if (SV->getType() != AllocaType) { @@ -1732,11 +1759,11 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,    // only some bits in the structure are set.    APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));    if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { -    SV = Builder.CreateShl(SV, Context->getConstantInt(SV->getType(), +    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),                             ShAmt), "tmp");      Mask <<= ShAmt;    } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { -    SV = Builder.CreateLShr(SV, Context->getConstantInt(SV->getType(), +    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),                              -ShAmt), "tmp");      Mask = Mask.lshr(-ShAmt);    } @@ -1745,7 +1772,7 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,    // in the new bits.    if (SrcWidth != DestWidth) {      assert(DestWidth > SrcWidth); -    Old = Builder.CreateAnd(Old, Context->getConstantInt(~Mask), "mask"); +    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");      SV = Builder.CreateOr(Old, SV, "ins");    }    return SV; diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index b8bce801a1fb..29712b3c13de 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -30,7 +30,6 @@  #include "llvm/Module.h"  #include "llvm/Attributes.h"  #include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Pass.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/SmallPtrSet.h" @@ -40,7 +39,7 @@ using namespace llvm;  STATISTIC(NumSimpl, "Number of blocks simplified");  namespace { -  struct VISIBILITY_HIDDEN CFGSimplifyPass : public FunctionPass { +  struct CFGSimplifyPass : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      CFGSimplifyPass() : FunctionPass(&ID) {} @@ -58,20 +57,20 @@ FunctionPass *llvm::createCFGSimplificationPass() {  /// ChangeToUnreachable - Insert an unreachable instruction before the specified  /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, LLVMContext* Context) { +static void ChangeToUnreachable(Instruction *I, LLVMContext &Context) {    BasicBlock *BB = I->getParent();    // Loop over all of the successors, removing BB's entry from any PHI    // nodes.    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)      (*SI)->removePredecessor(BB); -  new UnreachableInst(I); +  new UnreachableInst(I->getContext(), I);    // All instructions after this are dead.    BasicBlock::iterator BBI = I, BBE = BB->end();    while (BBI != BBE) {      if (!BBI->use_empty()) -      BBI->replaceAllUsesWith(Context->getUndef(BBI->getType())); +      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));      BB->getInstList().erase(BBI++);    }  } @@ -97,7 +96,7 @@ static void ChangeToCall(InvokeInst *II) {  static bool MarkAliveBlocks(BasicBlock *BB,                              SmallPtrSet<BasicBlock*, 128> &Reachable, -                            LLVMContext* Context) { +                            LLVMContext &Context) {    SmallVector<BasicBlock*, 128> Worklist;    Worklist.push_back(BB); @@ -132,7 +131,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,          if (isa<UndefValue>(Ptr) ||              (isa<ConstantPointerNull>(Ptr) && -             cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) { +             SI->getPointerAddressSpace() == 0)) {            ChangeToUnreachable(SI, Context);            Changed = true;            break; diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp index 4aad17d7236d..13077fe642a7 100644 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp @@ -22,15 +22,13 @@  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Target/TargetData.h"  #include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" -#include "llvm/Config/config.h"  using namespace llvm;  namespace {    /// This pass optimizes well half_powr function calls.    /// -  class VISIBILITY_HIDDEN SimplifyHalfPowrLibCalls : public FunctionPass { +  class SimplifyHalfPowrLibCalls : public FunctionPass {      const TargetData *TD;    public:      static char ID; // Pass identification @@ -39,7 +37,6 @@ namespace {      bool runOnFunction(Function &F);      virtual void getAnalysisUsage(AnalysisUsage &AU) const { -      AU.addRequired<TargetData>();      }      Instruction * @@ -60,8 +57,9 @@ FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {  /// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging  /// their control flow to better facilitate subsequent optimization.  Instruction * -SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, -                                        Instruction *InsertPt) { +SimplifyHalfPowrLibCalls:: +InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs, +                Instruction *InsertPt) {    std::vector<BasicBlock *> Bodies;    BasicBlock *NewBlock = 0; @@ -123,7 +121,7 @@ SimplifyHalfPowrLibCalls::InlineHalfPowrs(const std::vector<Instruction *> &Half  /// runOnFunction - Top level algorithm.  ///  bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { -  TD = &getAnalysis<TargetData>(); +  TD = getAnalysisIfAvailable<TargetData>();    bool Changed = false;    std::vector<Instruction *> HalfPowrs; @@ -136,8 +134,7 @@ bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) {          Function *Callee = CI->getCalledFunction();          if (Callee && Callee->hasExternalLinkage()) {            // Look for calls with well-known names. -          const char *CalleeName = Callee->getNameStart(); -          if (strcmp(CalleeName, "__half_powrf4") == 0) +          if (Callee->getName() == "__half_powrf4")              IsHalfPowr = true;          }        } diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index ec48469f536e..e186601505c2 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -9,11 +9,9 @@  //  // This file implements a simple pass that applies a variety of small  // optimizations for calls to specific well-known function calls (e.g. runtime -// library functions). For example, a call to the function "exit(3)" that -// occurs within the main() function can be transformed into a simple "return 3" -// instruction. Any optimization that takes this form (replace call to library -// function with simpler code that provides the same result) belongs in this -// file. +// library functions).   Any optimization that takes the very simple form +// "replace call to library function with simpler code that provides the same +// result" belongs in this file.  //  //===----------------------------------------------------------------------===// @@ -29,8 +27,9 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/StringMap.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" +#include "llvm/ADT/STLExtras.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Config/config.h"  using namespace llvm; @@ -44,7 +43,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions");  /// This class is the abstract base class for the set of optimizations that  /// corresponds to one library call.  namespace { -class VISIBILITY_HIDDEN LibCallOptimization { +class LibCallOptimization {  protected:    Function *Caller;    const TargetData *TD; @@ -58,14 +57,14 @@ public:    /// performed.  If it returns CI, then it transformed the call and CI is to be    /// deleted.  If it returns something else, replace CI with the new value and    /// delete CI. -  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)  +  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)      =0; -   -  Value *OptimizeCall(CallInst *CI, const TargetData &TD, IRBuilder<> &B) { + +  Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) {      Caller = CI->getParent()->getParent(); -    this->TD = &TD; +    this->TD = TD;      if (CI->getCalledFunction()) -      Context = CI->getCalledFunction()->getContext(); +      Context = &CI->getCalledFunction()->getContext();      return CallOptimizer(CI->getCalledFunction(), CI, B);    } @@ -76,12 +75,12 @@ public:    /// specified pointer.  Ptr is required to be some pointer type, and the    /// return value has 'intptr_t' type.    Value *EmitStrLen(Value *Ptr, IRBuilder<> &B); -   +    /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This    /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. -  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,  +  Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len,                      unsigned Align, IRBuilder<> &B); -   +    /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is    /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.    Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); @@ -96,35 +95,36 @@ public:    /// 'floor').  This function is known to take a single of type matching 'Op'    /// and returns one value with the same type.  If 'Op' is a long double, 'l'    /// is added as the suffix of name, if 'Op' is a float, we add a 'f' suffix. -  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B); -   +  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B, +                              const AttrListPtr &Attrs); +    /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char    /// is an integer.    void EmitPutChar(Value *Char, IRBuilder<> &B); -   +    /// EmitPutS - Emit a call to the puts function.  This assumes that Str is    /// some pointer.    void EmitPutS(Value *Str, IRBuilder<> &B); -     +    /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is    /// an i32, and File is a pointer to FILE.    void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B); -   +    /// EmitFPutS - Emit a call to the puts function.  Str is required to be a    /// pointer and File is a pointer to FILE.    void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B); -   +    /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is    /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.    void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B); -   +  };  } // End anonymous namespace.  /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.  Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {    return -        B.CreateBitCast(V, Context->getPointerTypeUnqual(Type::Int8Ty), "cstr"); +        B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");  }  /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -137,8 +137,8 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {                                     Attribute::NoUnwind);    Constant *StrLen =M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2), -                                           TD->getIntPtrType(), -                                    Context->getPointerTypeUnqual(Type::Int8Ty), +                                           TD->getIntPtrType(*Context), +					   Type::getInt8PtrTy(*Context),                                             NULL);    CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");    if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts())) @@ -157,7 +157,7 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,    Tys[0] = Len->getType();    Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);    return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len, -                       Context->getConstantInt(Type::Int32Ty, Align)); +                       ConstantInt::get(Type::getInt32Ty(*Context), Align));  }  /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is @@ -169,9 +169,10 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,    AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);    Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1), -                                    Context->getPointerTypeUnqual(Type::Int8Ty), -                                    Context->getPointerTypeUnqual(Type::Int8Ty), -                                         Type::Int32Ty, TD->getIntPtrType(), +					 Type::getInt8PtrTy(*Context), +					 Type::getInt8PtrTy(*Context), +                                         Type::getInt32Ty(*Context), +					 TD->getIntPtrType(*Context),                                           NULL);    CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr"); @@ -192,10 +193,10 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,                                     Attribute::NoUnwind);    Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3), -                                         Type::Int32Ty, -                                    Context->getPointerTypeUnqual(Type::Int8Ty), -                                    Context->getPointerTypeUnqual(Type::Int8Ty), -                                         TD->getIntPtrType(), NULL); +                                         Type::getInt32Ty(*Context), +                                    Type::getInt8PtrTy(*Context), +                                    Type::getInt8PtrTy(*Context), +                                         TD->getIntPtrType(*Context), NULL);    CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),                                 Len, "memcmp"); @@ -213,7 +214,7 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,   const Type *Tys[1];   Tys[0] = Len->getType();   Value *MemSet = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *Align = Context->getConstantInt(Type::Int32Ty, 1); + Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);   return B.CreateCall4(MemSet, CastToCStr(Dst, B), Val, Len, Align);  } @@ -222,14 +223,15 @@ Value *LibCallOptimization::EmitMemSet(Value *Dst, Value *Val,  /// returns one value with the same type.  If 'Op' is a long double, 'l' is  /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.  Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name, -                                                 IRBuilder<> &B) { +                                                 IRBuilder<> &B, +                                                 const AttrListPtr &Attrs) {    char NameBuffer[20]; -  if (Op->getType() != Type::DoubleTy) { +  if (!Op->getType()->isDoubleTy()) {      // If we need to add a suffix, copy into NameBuffer.      unsigned NameLen = strlen(Name);      assert(NameLen < sizeof(NameBuffer)-2);      memcpy(NameBuffer, Name, NameLen); -    if (Op->getType() == Type::FloatTy) +    if (Op->getType()->isFloatTy())        NameBuffer[NameLen] = 'f';  // floorf      else        NameBuffer[NameLen] = 'l';  // floorl @@ -241,7 +243,7 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,    Value *Callee = M->getOrInsertFunction(Name, Op->getType(),                                           Op->getType(), NULL);    CallInst *CI = B.CreateCall(Callee, Op, Name); - +  CI->setAttributes(Attrs);    if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))      CI->setCallingConv(F->getCallingConv()); @@ -252,10 +254,12 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,  /// is an integer.  void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {    Module *M = Caller->getParent(); -  Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty, -                                          Type::Int32Ty, NULL); +  Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context), +                                          Type::getInt32Ty(*Context), NULL);    CallInst *CI = B.CreateCall(PutChar, -                              B.CreateIntCast(Char, Type::Int32Ty, "chari"), +                              B.CreateIntCast(Char, +					      Type::getInt32Ty(*Context), +					      "chari"),                                "putchar");    if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts())) @@ -271,8 +275,8 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {    AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);    Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2), -                                       Type::Int32Ty, -                                    Context->getPointerTypeUnqual(Type::Int8Ty), +                                       Type::getInt32Ty(*Context), +                                    Type::getInt8PtrTy(*Context),                                         NULL);    CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");    if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts())) @@ -289,12 +293,16 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {    AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);    Constant *F;    if (isa<PointerType>(File->getType())) -    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty, -                               Type::Int32Ty, File->getType(), NULL); +    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), +			       Type::getInt32Ty(*Context), +                               Type::getInt32Ty(*Context), File->getType(), +			       NULL);    else -    F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty, +    F = M->getOrInsertFunction("fputc", +			       Type::getInt32Ty(*Context), +			       Type::getInt32Ty(*Context),                                 File->getType(), NULL); -  Char = B.CreateIntCast(Char, Type::Int32Ty, "chari"); +  Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari");    CallInst *CI = B.CreateCall2(F, Char, File, "fputc");    if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) @@ -311,12 +319,13 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {    AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);    Constant *F;    if (isa<PointerType>(File->getType())) -    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), Type::Int32Ty, -                               Context->getPointerTypeUnqual(Type::Int8Ty), +    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3), +			       Type::getInt32Ty(*Context), +                               Type::getInt8PtrTy(*Context),                                 File->getType(), NULL);    else -    F = M->getOrInsertFunction("fputs", Type::Int32Ty, -                               Context->getPointerTypeUnqual(Type::Int8Ty), +    F = M->getOrInsertFunction("fputs", Type::getInt32Ty(*Context), +                               Type::getInt8PtrTy(*Context),                                 File->getType(), NULL);    CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs"); @@ -336,17 +345,19 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,    Constant *F;    if (isa<PointerType>(File->getType()))      F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3), -                               TD->getIntPtrType(), -                               Context->getPointerTypeUnqual(Type::Int8Ty), -                               TD->getIntPtrType(), TD->getIntPtrType(), +                               TD->getIntPtrType(*Context), +                               Type::getInt8PtrTy(*Context), +                               TD->getIntPtrType(*Context), +			       TD->getIntPtrType(*Context),                                 File->getType(), NULL);    else -    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(), -                               Context->getPointerTypeUnqual(Type::Int8Ty), -                               TD->getIntPtrType(), TD->getIntPtrType(), +    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(*Context), +                               Type::getInt8PtrTy(*Context), +                               TD->getIntPtrType(*Context), +			       TD->getIntPtrType(*Context),                                 File->getType(), NULL);    CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size, -                        Context->getConstantInt(TD->getIntPtrType(), 1), File); +                        ConstantInt::get(TD->getIntPtrType(*Context), 1), File);    if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))      CI->setCallingConv(Fn->getCallingConv()); @@ -362,30 +373,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {    // Look through noop bitcast instructions.    if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))      return GetStringLengthH(BCI->getOperand(0), PHIs); -   +    // If this is a PHI node, there are two cases: either we have already seen it    // or we haven't.    if (PHINode *PN = dyn_cast<PHINode>(V)) {      if (!PHIs.insert(PN))        return ~0ULL;  // already in the set. -     +      // If it was new, see if all the input strings are the same length.      uint64_t LenSoFar = ~0ULL;      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {        uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);        if (Len == 0) return 0; // Unknown length -> unknown. -       +        if (Len == ~0ULL) continue; -       +        if (Len != LenSoFar && LenSoFar != ~0ULL)          return 0;    // Disagree -> unknown.        LenSoFar = Len;      } -     +      // Success, all agree.      return LenSoFar;    } -   +    // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)    if (SelectInst *SI = dyn_cast<SelectInst>(V)) {      uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs); @@ -397,7 +408,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {      if (Len1 != Len2) return 0;      return Len1;    } -   +    // If the value is not a GEP instruction nor a constant expression with a    // GEP instruction, then return unknown.    User *GEP = 0; @@ -410,11 +421,11 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {    } else {      return 0;    } -   +    // Make sure the GEP has exactly three arguments.    if (GEP->getNumOperands() != 3)      return 0; -   +    // Check to make sure that the first operand of the GEP is an integer and    // has value 0 so that we are sure we're indexing into the initializer.    if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) { @@ -422,7 +433,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {        return 0;    } else      return 0; -   +    // If the second index isn't a ConstantInt, then this is a variable index    // into the array.  If this occurs, we can't say anything meaningful about    // the string. @@ -431,28 +442,30 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {      StartIdx = CI->getZExtValue();    else      return 0; -   +    // The GEP instruction, constant or instruction, must reference a global    // variable that is a constant and is initialized. The referenced constant    // initializer is the array that we'll use for optimization.    GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); -  if (!GV || !GV->isConstant() || !GV->hasInitializer()) +  if (!GV || !GV->isConstant() || !GV->hasInitializer() || +      GV->mayBeOverridden())      return 0;    Constant *GlobalInit = GV->getInitializer(); -   +    // Handle the ConstantAggregateZero case, which is a degenerate case. The    // initializer is constant zero so the length of the string must be zero.    if (isa<ConstantAggregateZero>(GlobalInit))      return 1;  // Len = 0 offset by 1. -   +    // Must be a Constant Array    ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit); -  if (!Array || Array->getType()->getElementType() != Type::Int8Ty) +  if (!Array || +      Array->getType()->getElementType() != Type::getInt8Ty(V->getContext()))      return false; -   +    // Get the number of elements in the array    uint64_t NumElts = Array->getType()->getNumElements(); -   +    // Traverse the constant array from StartIdx (derived above) which is    // the place the GEP refers to in the array.    for (unsigned i = StartIdx; i != NumElts; ++i) { @@ -463,7 +476,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {      if (CI->isZero())        return i-StartIdx+1; // We found end of string, success!    } -   +    return 0; // The array isn't null terminated, conservatively return 'unknown'.  } @@ -471,7 +484,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {  /// the specified pointer, return 'len+1'.  If we can't, return 0.  static uint64_t GetStringLength(Value *V) {    if (!isa<PointerType>(V->getType())) return 0; -   +    SmallPtrSet<PHINode*, 32> PHIs;    uint64_t Len = GetStringLengthH(V, PHIs);    // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return @@ -480,7 +493,7 @@ static uint64_t GetStringLength(Value *V) {  }  /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero.  +/// value is equal or not-equal to zero.  static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {    for (Value::use_iterator UI = V->use_begin(), E = V->use_end();         UI != E; ++UI) { @@ -496,73 +509,38 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {  }  //===----------------------------------------------------------------------===// -// Miscellaneous LibCall Optimizations -//===----------------------------------------------------------------------===// - -namespace { -//===---------------------------------------===// -// 'exit' Optimizations - -/// ExitOpt - int main() { exit(4); } --> int main() { return 4; } -struct VISIBILITY_HIDDEN ExitOpt : public LibCallOptimization { -  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { -    // Verify we have a reasonable prototype for exit. -    if (Callee->arg_size() == 0 || !CI->use_empty()) -      return 0; - -    // Verify the caller is main, and that the result type of main matches the -    // argument type of exit. -    if (!Caller->isName("main") || !Caller->hasExternalLinkage() || -        Caller->getReturnType() != CI->getOperand(1)->getType()) -      return 0; - -    TerminatorInst *OldTI = CI->getParent()->getTerminator(); -     -    // Create the return after the call. -    ReturnInst *RI = B.CreateRet(CI->getOperand(1)); - -    // Drop all successor phi node entries. -    for (unsigned i = 0, e = OldTI->getNumSuccessors(); i != e; ++i) -      OldTI->getSuccessor(i)->removePredecessor(CI->getParent()); -     -    // Erase all instructions from after our return instruction until the end of -    // the block. -    BasicBlock::iterator FirstDead = RI; ++FirstDead; -    CI->getParent()->getInstList().erase(FirstDead, CI->getParent()->end()); -    return CI; -  } -}; - -//===----------------------------------------------------------------------===//  // String and Memory LibCall Optimizations  //===----------------------------------------------------------------------===//  //===---------------------------------------===//  // 'strcat' Optimizations - -struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization { +namespace { +struct StrCatOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Verify the "strcat" function prototype.      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 2 || -        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || +        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||          FT->getParamType(0) != FT->getReturnType() ||          FT->getParamType(1) != FT->getReturnType())        return 0; -     +      // Extract some information from the instruction      Value *Dst = CI->getOperand(1);      Value *Src = CI->getOperand(2); -     +      // See if we can get the length of the input string.      uint64_t Len = GetStringLength(Src);      if (Len == 0) return 0;      --Len;  // Unbias length. -     +      // Handle the simple, do-nothing case: strcat(x, "") -> x      if (Len == 0)        return Dst; -     + +    // These optimizations require TargetData. +    if (!TD) return 0; +      EmitStrLenMemCpy(Src, Dst, Len, B);      return Dst;    } @@ -571,28 +549,28 @@ struct VISIBILITY_HIDDEN StrCatOpt : public LibCallOptimization {      // We need to find the end of the destination string.  That's where the      // memory is to be moved to. We just generate a call to strlen.      Value *DstLen = EmitStrLen(Dst, B); -     +      // Now that we have the destination's length, we must index into the      // destination's pointer to get the actual memcpy destination (end of      // the string .. we're concatenating).      Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr"); -     +      // We have enough information to now generate the memcpy call to do the      // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.      EmitMemCpy(CpyDst, Src, -               Context->getConstantInt(TD->getIntPtrType(), Len+1), 1, B); +               ConstantInt::get(TD->getIntPtrType(*Context), Len+1), 1, B);    }  };  //===---------------------------------------===//  // 'strncat' Optimizations -struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt { +struct StrNCatOpt : public StrCatOpt {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Verify the "strncat" function prototype.      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 3 || -        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || +        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||          FT->getParamType(0) != FT->getReturnType() ||          FT->getParamType(1) != FT->getReturnType() ||          !isa<IntegerType>(FT->getParamType(2))) @@ -619,6 +597,9 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {      // strncat(x,  c, 0) -> x      if (SrcLen == 0 || Len == 0) return Dst; +    // These optimizations require TargetData. +    if (!TD) return 0; +      // We don't optimize this case      if (Len < SrcLen) return 0; @@ -632,27 +613,31 @@ struct VISIBILITY_HIDDEN StrNCatOpt : public StrCatOpt {  //===---------------------------------------===//  // 'strchr' Optimizations -struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization { +struct StrChrOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Verify the "strchr" function prototype.      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 2 || -        FT->getReturnType() != Context->getPointerTypeUnqual(Type::Int8Ty) || +        FT->getReturnType() != Type::getInt8PtrTy(*Context) ||          FT->getParamType(0) != FT->getReturnType())        return 0; -     +      Value *SrcStr = CI->getOperand(1); -     +      // If the second operand is non-constant, see if we can compute the length      // of the input string and turn this into memchr.      ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));      if (CharC == 0) { +      // These optimizations require TargetData. +      if (!TD) return 0; +        uint64_t Len = GetStringLength(SrcStr); -      if (Len == 0 || FT->getParamType(1) != Type::Int32Ty) // memchr needs i32. +      if (Len == 0 || +          FT->getParamType(1) != Type::getInt32Ty(*Context)) // memchr needs i32.          return 0; -       +        return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. -                        Context->getConstantInt(TD->getIntPtrType(), Len), B); +                        ConstantInt::get(TD->getIntPtrType(*Context), Len), B);      }      // Otherwise, the character is a constant, see if the first argument is @@ -660,24 +645,24 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {      std::string Str;      if (!GetConstantStringInfo(SrcStr, Str))        return 0; -     +      // strchr can find the nul character.      Str += '\0';      char CharValue = CharC->getSExtValue(); -     +      // Compute the offset.      uint64_t i = 0;      while (1) {        if (i == Str.size())    // Didn't find the char.  strchr returns null. -        return Context->getNullValue(CI->getType()); +        return Constant::getNullValue(CI->getType());        // Did we find our match?        if (Str[i] == CharValue)          break;        ++i;      } -     +      // strchr(s+n,c)  -> gep(s+n+i,c) -    Value *Idx = Context->getConstantInt(Type::Int64Ty, i); +    Value *Idx = ConstantInt::get(Type::getInt64Ty(*Context), i);      return B.CreateGEP(SrcStr, Idx, "strchr");    }  }; @@ -685,40 +670,44 @@ struct VISIBILITY_HIDDEN StrChrOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'strcmp' Optimizations -struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization { +struct StrCmpOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Verify the "strcmp" function prototype.      const FunctionType *FT = Callee->getFunctionType(); -    if (FT->getNumParams() != 2 || FT->getReturnType() != Type::Int32Ty || +    if (FT->getNumParams() != 2 || +	FT->getReturnType() != Type::getInt32Ty(*Context) ||          FT->getParamType(0) != FT->getParamType(1) || -        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty)) +        FT->getParamType(0) != Type::getInt8PtrTy(*Context))        return 0; -     +      Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);      if (Str1P == Str2P)      // strcmp(x,x)  -> 0 -      return Context->getConstantInt(CI->getType(), 0); -     +      return ConstantInt::get(CI->getType(), 0); +      std::string Str1, Str2;      bool HasStr1 = GetConstantStringInfo(Str1P, Str1);      bool HasStr2 = GetConstantStringInfo(Str2P, Str2); -     +      if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x        return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); -     +      if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x        return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); -     +      // strcmp(x, y)  -> cnst  (if both x and y are constant strings)      if (HasStr1 && HasStr2) -      return Context->getConstantInt(CI->getType(),  +      return ConstantInt::get(CI->getType(),                                       strcmp(Str1.c_str(),Str2.c_str()));      // strcmp(P, "x") -> memcmp(P, "x", 2)      uint64_t Len1 = GetStringLength(Str1P);      uint64_t Len2 = GetStringLength(Str2P);      if (Len1 && Len2) { +      // These optimizations require TargetData. +      if (!TD) return 0; +        return EmitMemCmp(Str1P, Str2P, -                        Context->getConstantInt(TD->getIntPtrType(), +                        ConstantInt::get(TD->getIntPtrType(*Context),                          std::min(Len1, Len2)), B);      } @@ -729,43 +718,44 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'strncmp' Optimizations -struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization { +struct StrNCmpOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Verify the "strncmp" function prototype.      const FunctionType *FT = Callee->getFunctionType(); -    if (FT->getNumParams() != 3 || FT->getReturnType() != Type::Int32Ty || +    if (FT->getNumParams() != 3 || +	FT->getReturnType() != Type::getInt32Ty(*Context) ||          FT->getParamType(0) != FT->getParamType(1) || -        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || +        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||          !isa<IntegerType>(FT->getParamType(2)))        return 0; -     +      Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);      if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0 -      return Context->getConstantInt(CI->getType(), 0); -     +      return ConstantInt::get(CI->getType(), 0); +      // Get the length argument if it is constant.      uint64_t Length;      if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))        Length = LengthArg->getZExtValue();      else        return 0; -     +      if (Length == 0) // strncmp(x,y,0)   -> 0 -      return Context->getConstantInt(CI->getType(), 0); -     +      return ConstantInt::get(CI->getType(), 0); +      std::string Str1, Str2;      bool HasStr1 = GetConstantStringInfo(Str1P, Str1);      bool HasStr2 = GetConstantStringInfo(Str2P, Str2); -     +      if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> *x        return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType()); -     +      if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x        return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); -     +      // strncmp(x, y)  -> cnst  (if both x and y are constant strings)      if (HasStr1 && HasStr2) -      return Context->getConstantInt(CI->getType(), +      return ConstantInt::get(CI->getType(),                                strncmp(Str1.c_str(), Str2.c_str(), Length));      return 0;    } @@ -775,27 +765,30 @@ struct VISIBILITY_HIDDEN StrNCmpOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'strcpy' Optimizations -struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization { +struct StrCpyOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Verify the "strcpy" function prototype.      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||          FT->getParamType(0) != FT->getParamType(1) || -        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty)) +        FT->getParamType(0) != Type::getInt8PtrTy(*Context))        return 0; -     +      Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);      if (Dst == Src)      // strcpy(x,x)  -> x        return Src; -     + +    // These optimizations require TargetData. +    if (!TD) return 0; +      // See if we can get the length of the input string.      uint64_t Len = GetStringLength(Src);      if (Len == 0) return 0; -     +      // We have enough information to now generate the memcpy call to do the      // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.      EmitMemCpy(Dst, Src, -               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B); +               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);      return Dst;    }  }; @@ -803,12 +796,12 @@ struct VISIBILITY_HIDDEN StrCpyOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'strncpy' Optimizations -struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization { +struct StrNCpyOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||          FT->getParamType(0) != FT->getParamType(1) || -        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || +        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||          !isa<IntegerType>(FT->getParamType(2)))        return 0; @@ -823,7 +816,8 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {      if (SrcLen == 0) {        // strncpy(x, "", y) -> memset(x, '\0', y, 1) -      EmitMemSet(Dst, Context->getConstantInt(Type::Int8Ty, '\0'), LenOp, B); +      EmitMemSet(Dst, ConstantInt::get(Type::getInt8Ty(*Context), '\0'), LenOp, +		 B);        return Dst;      } @@ -835,12 +829,15 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {      if (Len == 0) return Dst; // strncpy(x, y, 0) -> x +    // These optimizations require TargetData. +    if (!TD) return 0; +      // Let strncpy handle the zero padding      if (Len > SrcLen+1) return 0;      // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]      EmitMemCpy(Dst, Src, -               Context->getConstantInt(TD->getIntPtrType(), Len), 1, B); +               ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B);      return Dst;    } @@ -849,19 +846,19 @@ struct VISIBILITY_HIDDEN StrNCpyOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'strlen' Optimizations -struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization { +struct StrLenOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 1 || -        FT->getParamType(0) != Context->getPointerTypeUnqual(Type::Int8Ty) || +        FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||          !isa<IntegerType>(FT->getReturnType()))        return 0; -     +      Value *Src = CI->getOperand(1);      // Constant folding: strlen("xyz") -> 3      if (uint64_t Len = GetStringLength(Src)) -      return Context->getConstantInt(CI->getType(), Len-1); +      return ConstantInt::get(CI->getType(), Len-1);      // Handle strlen(p) != 0.      if (!IsOnlyUsedInZeroEqualityComparison(CI)) return 0; @@ -875,7 +872,7 @@ struct VISIBILITY_HIDDEN StrLenOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'strto*' Optimizations -struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization { +struct StrToOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || @@ -897,18 +894,18 @@ struct VISIBILITY_HIDDEN StrToOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'memcmp' Optimizations -struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization { +struct MemCmpOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||          !isa<PointerType>(FT->getParamType(1)) || -        FT->getReturnType() != Type::Int32Ty) +        FT->getReturnType() != Type::getInt32Ty(*Context))        return 0;      Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);      if (LHS == RHS)  // memcmp(s,s,x) -> 0 -      return Context->getNullValue(CI->getType()); +      return Constant::getNullValue(CI->getType());      // Make sure we have a constant length.      ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3)); @@ -916,7 +913,7 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {      uint64_t Len = LenC->getZExtValue();      if (Len == 0) // memcmp(s1,s2,0) -> 0 -      return Context->getNullValue(CI->getType()); +      return Constant::getNullValue(CI->getType());      if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS        Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv"); @@ -927,8 +924,8 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {      // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS)  != 0      // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS)  != 0      if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) { -      const Type *PTy = Context->getPointerTypeUnqual(Len == 2 ? -                                               Type::Int16Ty : Type::Int32Ty); +      const Type *PTy = PointerType::getUnqual(Len == 2 ? +                       Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context));        LHS = B.CreateBitCast(LHS, PTy, "tmp");        RHS = B.CreateBitCast(RHS, PTy, "tmp");        LoadInst *LHSV = B.CreateLoad(LHS, "lhsv"); @@ -944,13 +941,16 @@ struct VISIBILITY_HIDDEN MemCmpOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'memcpy' Optimizations -struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization { +struct MemCpyOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { +    // These optimizations require TargetData. +    if (!TD) return 0; +      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||          !isa<PointerType>(FT->getParamType(0)) ||          !isa<PointerType>(FT->getParamType(1)) || -        FT->getParamType(2) != TD->getIntPtrType()) +        FT->getParamType(2) != TD->getIntPtrType(*Context))        return 0;      // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) @@ -962,25 +962,28 @@ struct VISIBILITY_HIDDEN MemCpyOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'memmove' Optimizations -struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization { +struct MemMoveOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { +    // These optimizations require TargetData. +    if (!TD) return 0; +      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||          !isa<PointerType>(FT->getParamType(0)) ||          !isa<PointerType>(FT->getParamType(1)) || -        FT->getParamType(2) != TD->getIntPtrType()) +        FT->getParamType(2) != TD->getIntPtrType(*Context))        return 0;      // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)      Module *M = Caller->getParent();      Intrinsic::ID IID = Intrinsic::memmove;      const Type *Tys[1]; -    Tys[0] = TD->getIntPtrType(); +    Tys[0] = TD->getIntPtrType(*Context);      Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);      Value *Dst = CastToCStr(CI->getOperand(1), B);      Value *Src = CastToCStr(CI->getOperand(2), B);      Value *Size = CI->getOperand(3); -    Value *Align = Context->getConstantInt(Type::Int32Ty, 1); +    Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1);      B.CreateCall4(MemMove, Dst, Src, Size, Align);      return CI->getOperand(1);    } @@ -989,17 +992,21 @@ struct VISIBILITY_HIDDEN MemMoveOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'memset' Optimizations -struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization { +struct MemSetOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { +    // These optimizations require TargetData. +    if (!TD) return 0; +      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||          !isa<PointerType>(FT->getParamType(0)) || -        FT->getParamType(1) != TD->getIntPtrType() || -        FT->getParamType(2) != TD->getIntPtrType()) +        !isa<IntegerType>(FT->getParamType(1)) || +        FT->getParamType(2) != TD->getIntPtrType(*Context))        return 0;      // memset(p, v, n) -> llvm.memset(p, v, n, 1) -    Value *Val = B.CreateTrunc(CI->getOperand(2), Type::Int8Ty); +    Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), +				 false);      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B);      return CI->getOperand(1);    } @@ -1012,7 +1019,7 @@ struct VISIBILITY_HIDDEN MemSetOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'pow*' Optimizations -struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { +struct PowOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      // Just make sure this has 2 arguments of the same FP type, which match the @@ -1021,40 +1028,44 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {          FT->getParamType(0) != FT->getParamType(1) ||          !FT->getParamType(0)->isFloatingPoint())        return 0; -     +      Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);      if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {        if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0          return Op1C;        if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x) -        return EmitUnaryFloatFnCall(Op2, "exp2", B); +        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());      } -     +      ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);      if (Op2C == 0) return 0; -     +      if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0 -      return Context->getConstantFP(CI->getType(), 1.0); -     +      return ConstantFP::get(CI->getType(), 1.0); +      if (Op2C->isExactlyValue(0.5)) { -      // FIXME: This is not safe for -0.0 and -inf.  This can only be done when -      // 'unsafe' math optimizations are allowed. -      // x    pow(x, 0.5)  sqrt(x) -      // --------------------------------------------- -      // -0.0    +0.0       -0.0 -      // -inf    +inf       NaN -#if 0 -      // pow(x, 0.5) -> sqrt(x) -      return B.CreateCall(get_sqrt(), Op1, "sqrt"); -#endif +      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))). +      // This is faster than calling pow, and still handles negative zero +      // and negative infinite correctly. +      // TODO: In fast-math mode, this could be just sqrt(x). +      // TODO: In finite-only mode, this could be just fabs(sqrt(x)). +      Value *Inf = ConstantFP::getInfinity(CI->getType()); +      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true); +      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B, +                                         Callee->getAttributes()); +      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B, +                                         Callee->getAttributes()); +      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp"); +      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp"); +      return Sel;      } -     +      if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x        return Op1;      if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x        return B.CreateFMul(Op1, Op1, "pow2");      if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x -      return B.CreateFDiv(Context->getConstantFP(CI->getType(), 1.0), +      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),                            Op1, "powrecip");      return 0;    } @@ -1063,7 +1074,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'exp2' Optimizations -struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization { +struct Exp2Opt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      // Just make sure this has 1 argument of FP type, which matches the @@ -1071,35 +1082,38 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {      if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||          !FT->getParamType(0)->isFloatingPoint())        return 0; -     +      Value *Op = CI->getOperand(1);      // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32      // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32      Value *LdExpArg = 0;      if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {        if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) -        LdExpArg = B.CreateSExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); +        LdExpArg = B.CreateSExt(OpC->getOperand(0), +				Type::getInt32Ty(*Context), "tmp");      } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {        if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) -        LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp"); +        LdExpArg = B.CreateZExt(OpC->getOperand(0), +				Type::getInt32Ty(*Context), "tmp");      }      if (LdExpArg) {        const char *Name; -      if (Op->getType() == Type::FloatTy) +      if (Op->getType()->isFloatTy())          Name = "ldexpf"; -      else if (Op->getType() == Type::DoubleTy) +      else if (Op->getType()->isDoubleTy())          Name = "ldexp";        else          Name = "ldexpl"; -      Constant *One = Context->getConstantFP(APFloat(1.0f)); -      if (Op->getType() != Type::FloatTy) -        One = Context->getConstantExprFPExtend(One, Op->getType()); +      Constant *One = ConstantFP::get(*Context, APFloat(1.0f)); +      if (!Op->getType()->isFloatTy()) +        One = ConstantExpr::getFPExtend(One, Op->getType());        Module *M = Caller->getParent();        Value *Callee = M->getOrInsertFunction(Name, Op->getType(), -                                             Op->getType(), Type::Int32Ty,NULL); +                                             Op->getType(), +					     Type::getInt32Ty(*Context),NULL);        CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);        if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))          CI->setCallingConv(F->getCallingConv()); @@ -1113,22 +1127,23 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {  //===---------------------------------------===//  // Double -> Float Shrinking Optimizations for Unary Functions like 'floor' -struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization { +struct UnaryDoubleFPOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType(); -    if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy || -        FT->getParamType(0) != Type::DoubleTy) +    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || +        !FT->getParamType(0)->isDoubleTy())        return 0;      // If this is something like 'floor((double)floatval)', convert to floorf.      FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1)); -    if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy) +    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())        return 0;      // floor((double)floatval) -> (double)floorf(floatval)      Value *V = Cast->getOperand(0); -    V = EmitUnaryFloatFnCall(V, Callee->getNameStart(), B); -    return B.CreateFPExt(V, Type::DoubleTy); +    V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B, +                             Callee->getAttributes()); +    return B.CreateFPExt(V, Type::getDoubleTy(*Context));    }  }; @@ -1139,54 +1154,56 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'ffs*' Optimizations -struct VISIBILITY_HIDDEN FFSOpt : public LibCallOptimization { +struct FFSOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      // Just make sure this has 2 arguments of the same FP type, which match the      // result type. -    if (FT->getNumParams() != 1 || FT->getReturnType() != Type::Int32Ty || +    if (FT->getNumParams() != 1 || +	FT->getReturnType() != Type::getInt32Ty(*Context) ||          !isa<IntegerType>(FT->getParamType(0)))        return 0; -     +      Value *Op = CI->getOperand(1); -     +      // Constant fold.      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {        if (CI->getValue() == 0)  // ffs(0) -> 0. -        return Context->getNullValue(CI->getType()); -      return Context->getConstantInt(Type::Int32Ty, // ffs(c) -> cttz(c)+1 +        return Constant::getNullValue(CI->getType()); +      return ConstantInt::get(Type::getInt32Ty(*Context), // ffs(c) -> cttz(c)+1                                CI->getValue().countTrailingZeros()+1);      } -     +      // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0      const Type *ArgType = Op->getType();      Value *F = Intrinsic::getDeclaration(Callee->getParent(),                                           Intrinsic::cttz, &ArgType, 1);      Value *V = B.CreateCall(F, Op, "cttz"); -    V = B.CreateAdd(V, Context->getConstantInt(V->getType(), 1), "tmp"); -    V = B.CreateIntCast(V, Type::Int32Ty, false, "tmp"); -     -    Value *Cond = B.CreateICmpNE(Op, Context->getNullValue(ArgType), "tmp"); -    return B.CreateSelect(Cond, V, Context->getConstantInt(Type::Int32Ty, 0)); +    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp"); +    V = B.CreateIntCast(V, Type::getInt32Ty(*Context), false, "tmp"); + +    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); +    return B.CreateSelect(Cond, V, +			  ConstantInt::get(Type::getInt32Ty(*Context), 0));    }  };  //===---------------------------------------===//  // 'isdigit' Optimizations -struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization { +struct IsDigitOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      // We require integer(i32)      if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || -        FT->getParamType(0) != Type::Int32Ty) +        FT->getParamType(0) != Type::getInt32Ty(*Context))        return 0; -     +      // isdigit(c) -> (c-'0') <u 10      Value *Op = CI->getOperand(1); -    Op = B.CreateSub(Op, Context->getConstantInt(Type::Int32Ty, '0'),  +    Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),                       "isdigittmp"); -    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 10),  +    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),                           "isdigit");      return B.CreateZExt(Op, CI->getType());    } @@ -1195,58 +1212,58 @@ struct VISIBILITY_HIDDEN IsDigitOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'isascii' Optimizations -struct VISIBILITY_HIDDEN IsAsciiOpt : public LibCallOptimization { +struct IsAsciiOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      // We require integer(i32)      if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) || -        FT->getParamType(0) != Type::Int32Ty) +        FT->getParamType(0) != Type::getInt32Ty(*Context))        return 0; -     +      // isascii(c) -> c <u 128      Value *Op = CI->getOperand(1); -    Op = B.CreateICmpULT(Op, Context->getConstantInt(Type::Int32Ty, 128), +    Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),                           "isascii");      return B.CreateZExt(Op, CI->getType());    }  }; -   +  //===---------------------------------------===//  // 'abs', 'labs', 'llabs' Optimizations -struct VISIBILITY_HIDDEN AbsOpt : public LibCallOptimization { +struct AbsOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      // We require integer(integer) where the types agree.      if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||          FT->getParamType(0) != FT->getReturnType())        return 0; -     +      // abs(x) -> x >s -1 ? x : -x      Value *Op = CI->getOperand(1); -    Value *Pos = B.CreateICmpSGT(Op,  -                             Context->getConstantIntAllOnesValue(Op->getType()), +    Value *Pos = B.CreateICmpSGT(Op, +                             Constant::getAllOnesValue(Op->getType()),                                   "ispos");      Value *Neg = B.CreateNeg(Op, "neg");      return B.CreateSelect(Pos, Op, Neg);    }  }; -   +  //===---------------------------------------===//  // 'toascii' Optimizations -struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization { +struct ToAsciiOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      const FunctionType *FT = Callee->getFunctionType();      // We require i32(i32)      if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || -        FT->getParamType(0) != Type::Int32Ty) +        FT->getParamType(0) != Type::getInt32Ty(*Context))        return 0; -     +      // isascii(c) -> c & 0x7f      return B.CreateAnd(CI->getOperand(1), -                       Context->getConstantInt(CI->getType(),0x7F)); +                       ConstantInt::get(CI->getType(),0x7F));    }  }; @@ -1257,15 +1274,15 @@ struct VISIBILITY_HIDDEN ToAsciiOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'printf' Optimizations -struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization { +struct PrintFOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Require one fixed pointer argument and an integer/void result.      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||          !(isa<IntegerType>(FT->getReturnType()) || -          FT->getReturnType() == Type::VoidTy)) +          FT->getReturnType()->isVoidTy()))        return 0; -     +      // Check for a fixed format string.      std::string FormatStr;      if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) @@ -1273,39 +1290,39 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {      // Empty format string -> noop.      if (FormatStr.empty())  // Tolerate printf's declared void. -      return CI->use_empty() ? (Value*)CI :  -                               Context->getConstantInt(CI->getType(), 0); -     +      return CI->use_empty() ? (Value*)CI : +                               ConstantInt::get(CI->getType(), 0); +      // printf("x") -> putchar('x'), even for '%'.      if (FormatStr.size() == 1) { -      EmitPutChar(Context->getConstantInt(Type::Int32Ty, FormatStr[0]), B); -      return CI->use_empty() ? (Value*)CI :  -                               Context->getConstantInt(CI->getType(), 1); +      EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B); +      return CI->use_empty() ? (Value*)CI : +                               ConstantInt::get(CI->getType(), 1);      } -     +      // printf("foo\n") --> puts("foo")      if (FormatStr[FormatStr.size()-1] == '\n' &&          FormatStr.find('%') == std::string::npos) {  // no format characters.        // Create a string literal with no \n on it.  We expect the constant merge        // pass to be run after this pass, to merge duplicate strings.        FormatStr.erase(FormatStr.end()-1); -      Constant *C = Context->getConstantArray(FormatStr, true); -      C = new GlobalVariable(C->getType(), true,GlobalVariable::InternalLinkage, -                             C, "str", Callee->getParent()); +      Constant *C = ConstantArray::get(*Context, FormatStr, true); +      C = new GlobalVariable(*Callee->getParent(), C->getType(), true, +                             GlobalVariable::InternalLinkage, C, "str");        EmitPutS(C, B); -      return CI->use_empty() ? (Value*)CI :  -                    Context->getConstantInt(CI->getType(), FormatStr.size()+1); +      return CI->use_empty() ? (Value*)CI : +                    ConstantInt::get(CI->getType(), FormatStr.size()+1);      } -     +      // Optimize specific format strings.      // printf("%c", chr) --> putchar(*(i8*)dst)      if (FormatStr == "%c" && CI->getNumOperands() > 2 &&          isa<IntegerType>(CI->getOperand(2)->getType())) {        EmitPutChar(CI->getOperand(2), B); -      return CI->use_empty() ? (Value*)CI :  -                               Context->getConstantInt(CI->getType(), 1); +      return CI->use_empty() ? (Value*)CI : +                               ConstantInt::get(CI->getType(), 1);      } -     +      // printf("%s\n", str) --> puts(str)      if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&          isa<PointerType>(CI->getOperand(2)->getType()) && @@ -1320,7 +1337,7 @@ struct VISIBILITY_HIDDEN PrintFOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'sprintf' Optimizations -struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization { +struct SPrintFOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Require two fixed pointer arguments and an integer result.      const FunctionType *FT = Callee->getFunctionType(); @@ -1333,7 +1350,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {      std::string FormatStr;      if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))        return 0; -     +      // If we just have a format string (nothing else crazy) transform it.      if (CI->getNumOperands() == 3) {        // Make sure there's no % in the constant array.  We could try to handle @@ -1341,41 +1358,49 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {        for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)          if (FormatStr[i] == '%')            return 0; // we found a format specifier, bail out. -       + +      // These optimizations require TargetData. +      if (!TD) return 0; +        // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)        EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. -          Context->getConstantInt(TD->getIntPtrType(), FormatStr.size()+1),1,B); -      return Context->getConstantInt(CI->getType(), FormatStr.size()); +          ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1),1,B); +      return ConstantInt::get(CI->getType(), FormatStr.size());      } -     +      // The remaining optimizations require the format string to be "%s" or "%c"      // and have an extra operand.      if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)        return 0; -     +      // Decode the second character of the format string.      if (FormatStr[1] == 'c') {        // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0        if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0; -      Value *V = B.CreateTrunc(CI->getOperand(3), Type::Int8Ty, "char"); +      Value *V = B.CreateTrunc(CI->getOperand(3), +			       Type::getInt8Ty(*Context), "char");        Value *Ptr = CastToCStr(CI->getOperand(1), B);        B.CreateStore(V, Ptr); -      Ptr = B.CreateGEP(Ptr, Context->getConstantInt(Type::Int32Ty, 1), "nul"); -      B.CreateStore(Context->getNullValue(Type::Int8Ty), Ptr); -       -      return Context->getConstantInt(CI->getType(), 1); +      Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), +			"nul"); +      B.CreateStore(Constant::getNullValue(Type::getInt8Ty(*Context)), Ptr); + +      return ConstantInt::get(CI->getType(), 1);      } -     +      if (FormatStr[1] == 's') { +      // These optimizations require TargetData. +      if (!TD) return 0; +        // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)        if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;        Value *Len = EmitStrLen(CI->getOperand(3), B);        Value *IncLen = B.CreateAdd(Len, -                                  Context->getConstantInt(Len->getType(), 1), +                                  ConstantInt::get(Len->getType(), 1),                                    "leninc");        EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, B); -       +        // The sprintf result is the unincremented number of bytes in the string.        return B.CreateIntCast(Len, CI->getType(), false);      } @@ -1386,7 +1411,7 @@ struct VISIBILITY_HIDDEN SPrintFOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'fwrite' Optimizations -struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization { +struct FWriteOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Require a pointer, an integer, an integer, a pointer, returning integer.      const FunctionType *FT = Callee->getFunctionType(); @@ -1396,22 +1421,22 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {          !isa<PointerType>(FT->getParamType(3)) ||          !isa<IntegerType>(FT->getReturnType()))        return 0; -     +      // Get the element size and count.      ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));      ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));      if (!SizeC || !CountC) return 0;      uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); -     +      // If this is writing zero records, remove the call (it's a noop).      if (Bytes == 0) -      return Context->getConstantInt(CI->getType(), 0); -     +      return ConstantInt::get(CI->getType(), 0); +      // If this is writing one byte, turn it into fputc.      if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)        Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");        EmitFPutC(Char, CI->getOperand(4), B); -      return Context->getConstantInt(CI->getType(), 1); +      return ConstantInt::get(CI->getType(), 1);      }      return 0; @@ -1421,20 +1446,23 @@ struct VISIBILITY_HIDDEN FWriteOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'fputs' Optimizations -struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization { +struct FPutsOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { +    // These optimizations require TargetData. +    if (!TD) return 0; +      // Require two pointers.  Also, we can't optimize if return value is used.      const FunctionType *FT = Callee->getFunctionType();      if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||          !isa<PointerType>(FT->getParamType(1)) ||          !CI->use_empty())        return 0; -     +      // fputs(s,F) --> fwrite(s,1,strlen(s),F)      uint64_t Len = GetStringLength(CI->getOperand(1));      if (!Len) return 0;      EmitFWrite(CI->getOperand(1), -               Context->getConstantInt(TD->getIntPtrType(), Len-1), +               ConstantInt::get(TD->getIntPtrType(*Context), Len-1),                 CI->getOperand(2), B);      return CI;  // Known to have no uses (see above).    } @@ -1443,7 +1471,7 @@ struct VISIBILITY_HIDDEN FPutsOpt : public LibCallOptimization {  //===---------------------------------------===//  // 'fprintf' Optimizations -struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization { +struct FPrintFOpt : public LibCallOptimization {    virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {      // Require two fixed paramters as pointers and integer result.      const FunctionType *FT = Callee->getFunctionType(); @@ -1451,7 +1479,7 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {          !isa<PointerType>(FT->getParamType(1)) ||          !isa<IntegerType>(FT->getReturnType()))        return 0; -     +      // All the optimizations depend on the format string.      std::string FormatStr;      if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) @@ -1462,26 +1490,29 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {        for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)          if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.            return 0; // We found a format specifier. -       -      EmitFWrite(CI->getOperand(2), Context->getConstantInt(TD->getIntPtrType(), + +      // These optimizations require TargetData. +      if (!TD) return 0; + +      EmitFWrite(CI->getOperand(2), ConstantInt::get(TD->getIntPtrType(*Context),                                                       FormatStr.size()),                   CI->getOperand(1), B); -      return Context->getConstantInt(CI->getType(), FormatStr.size()); +      return ConstantInt::get(CI->getType(), FormatStr.size());      } -     +      // The remaining optimizations require the format string to be "%s" or "%c"      // and have an extra operand.      if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)        return 0; -     +      // Decode the second character of the format string.      if (FormatStr[1] == 'c') {        // fprintf(F, "%c", chr) --> *(i8*)dst = chr        if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;        EmitFPutC(CI->getOperand(3), CI->getOperand(1), B); -      return Context->getConstantInt(CI->getType(), 1); +      return ConstantInt::get(CI->getType(), 1);      } -     +      if (FormatStr[1] == 's') {        // fprintf(F, "%s", str) -> fputs(str, F)        if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty()) @@ -1502,10 +1533,8 @@ struct VISIBILITY_HIDDEN FPrintFOpt : public LibCallOptimization {  namespace {    /// This pass optimizes well known library functions from libc and libm.    /// -  class VISIBILITY_HIDDEN SimplifyLibCalls : public FunctionPass { +  class SimplifyLibCalls : public FunctionPass {      StringMap<LibCallOptimization*> Optimizations; -    // Miscellaneous LibCall Optimizations -    ExitOpt Exit;       // String and Memory LibCall Optimizations      StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;      StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen; @@ -1536,7 +1565,6 @@ namespace {      bool doInitialization(Module &M);      virtual void getAnalysisUsage(AnalysisUsage &AU) const { -      AU.addRequired<TargetData>();      }    };    char SimplifyLibCalls::ID = 0; @@ -1547,15 +1575,12 @@ X("simplify-libcalls", "Simplify well-known library calls");  // Public interface to the Simplify LibCalls pass.  FunctionPass *llvm::createSimplifyLibCallsPass() { -  return new SimplifyLibCalls();  +  return new SimplifyLibCalls();  }  /// Optimizations - Populate the Optimizations map with all the optimizations  /// we know.  void SimplifyLibCalls::InitOptimizations() { -  // Miscellaneous LibCall Optimizations -  Optimizations["exit"] = &Exit; -      // String and Memory LibCall Optimizations    Optimizations["strcat"] = &StrCat;    Optimizations["strncat"] = &StrNCat; @@ -1576,7 +1601,7 @@ void SimplifyLibCalls::InitOptimizations() {    Optimizations["memcpy"] = &MemCpy;    Optimizations["memmove"] = &MemMove;    Optimizations["memset"] = &MemSet; -   +    // Math Library Optimizations    Optimizations["powf"] = &Pow;    Optimizations["pow"] = &Pow; @@ -1594,7 +1619,7 @@ void SimplifyLibCalls::InitOptimizations() {    Optimizations["llvm.exp2.f80"] = &Exp2;    Optimizations["llvm.exp2.f64"] = &Exp2;    Optimizations["llvm.exp2.f32"] = &Exp2; -   +  #ifdef HAVE_FLOORF    Optimizations["floor"] = &UnaryDoubleFP;  #endif @@ -1610,7 +1635,7 @@ void SimplifyLibCalls::InitOptimizations() {  #ifdef HAVE_NEARBYINTF    Optimizations["nearbyint"] = &UnaryDoubleFP;  #endif -   +    // Integer Optimizations    Optimizations["ffs"] = &FFS;    Optimizations["ffsl"] = &FFS; @@ -1621,7 +1646,7 @@ void SimplifyLibCalls::InitOptimizations() {    Optimizations["isdigit"] = &IsDigit;    Optimizations["isascii"] = &IsAscii;    Optimizations["toascii"] = &ToAscii; -   +    // Formatting and IO Optimizations    Optimizations["sprintf"] = &SPrintF;    Optimizations["printf"] = &PrintF; @@ -1636,10 +1661,10 @@ void SimplifyLibCalls::InitOptimizations() {  bool SimplifyLibCalls::runOnFunction(Function &F) {    if (Optimizations.empty())      InitOptimizations(); -   -  const TargetData &TD = getAnalysis<TargetData>(); -   -  IRBuilder<> Builder; + +  const TargetData *TD = getAnalysisIfAvailable<TargetData>(); + +  IRBuilder<> Builder(F.getContext());    bool Changed = false;    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -1647,37 +1672,35 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {        // Ignore non-calls.        CallInst *CI = dyn_cast<CallInst>(I++);        if (!CI) continue; -       +        // Ignore indirect calls and calls to non-external functions.        Function *Callee = CI->getCalledFunction();        if (Callee == 0 || !Callee->isDeclaration() ||            !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))          continue; -       +        // Ignore unknown calls. -      const char *CalleeName = Callee->getNameStart(); -      StringMap<LibCallOptimization*>::iterator OMI = -        Optimizations.find(CalleeName, CalleeName+Callee->getNameLen()); -      if (OMI == Optimizations.end()) continue; -       +      LibCallOptimization *LCO = Optimizations.lookup(Callee->getName()); +      if (!LCO) continue; +        // Set the builder to the instruction after the call.        Builder.SetInsertPoint(BB, I); -       +        // Try to optimize this call. -      Value *Result = OMI->second->OptimizeCall(CI, TD, Builder); +      Value *Result = LCO->OptimizeCall(CI, TD, Builder);        if (Result == 0) continue; -      DEBUG(DOUT << "SimplifyLibCalls simplified: " << *CI; -            DOUT << "  into: " << *Result << "\n"); -       +      DEBUG(errs() << "SimplifyLibCalls simplified: " << *CI; +            errs() << "  into: " << *Result << "\n"); +        // Something changed!        Changed = true;        ++NumSimplified; -       +        // Inspect the instruction after the call (which was potentially just        // added) next.        I = CI; ++I; -       +        if (CI != Result && !CI->use_empty()) {          CI->replaceAllUsesWith(Result);          if (!Result->hasName()) @@ -1736,40 +1759,39 @@ bool SimplifyLibCalls::doInitialization(Module &M) {      if (!F.isDeclaration())        continue; -    unsigned NameLen = F.getNameLen(); -    if (!NameLen) +    if (!F.hasName())        continue;      const FunctionType *FTy = F.getFunctionType(); -    const char *NameStr = F.getNameStart(); -    switch (NameStr[0]) { +    StringRef Name = F.getName(); +    switch (Name[0]) {        case 's': -        if (NameLen == 6 && !strcmp(NameStr, "strlen")) { +        if (Name == "strlen") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setOnlyReadsMemory(F);            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 6 && !strcmp(NameStr, "strcpy")) || -                   (NameLen == 6 && !strcmp(NameStr, "stpcpy")) || -                   (NameLen == 6 && !strcmp(NameStr, "strcat")) || -                   (NameLen == 6 && !strcmp(NameStr, "strtol")) || -                   (NameLen == 6 && !strcmp(NameStr, "strtod")) || -                   (NameLen == 6 && !strcmp(NameStr, "strtof")) || -                   (NameLen == 7 && !strcmp(NameStr, "strtoul")) || -                   (NameLen == 7 && !strcmp(NameStr, "strtoll")) || -                   (NameLen == 7 && !strcmp(NameStr, "strtold")) || -                   (NameLen == 7 && !strcmp(NameStr, "strncat")) || -                   (NameLen == 7 && !strcmp(NameStr, "strncpy")) || -                   (NameLen == 8 && !strcmp(NameStr, "strtoull"))) { +        } else if (Name == "strcpy" || +                   Name == "stpcpy" || +                   Name == "strcat" || +                   Name == "strtol" || +                   Name == "strtod" || +                   Name == "strtof" || +                   Name == "strtoul" || +                   Name == "strtoll" || +                   Name == "strtold" || +                   Name == "strncat" || +                   Name == "strncpy" || +                   Name == "strtoull") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if (NameLen == 7 && !strcmp(NameStr, "strxfrm")) { +        } else if (Name == "strxfrm") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -1777,13 +1799,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 6 && !strcmp(NameStr, "strcmp")) || -                   (NameLen == 6 && !strcmp(NameStr, "strspn")) || -                   (NameLen == 7 && !strcmp(NameStr, "strncmp")) || -                   (NameLen == 7 && !strcmp(NameStr, "strcspn")) || -                   (NameLen == 7 && !strcmp(NameStr, "strcoll")) || -                   (NameLen == 10 && !strcmp(NameStr, "strcasecmp")) || -                   (NameLen == 11 && !strcmp(NameStr, "strncasecmp"))) { +        } else if (Name == "strcmp" || +                   Name == "strspn" || +                   Name == "strncmp" || +                   Name ==" strcspn" || +                   Name == "strcoll" || +                   Name == "strcasecmp" || +                   Name == "strncasecmp") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -1792,31 +1814,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 6 && !strcmp(NameStr, "strstr")) || -                   (NameLen == 7 && !strcmp(NameStr, "strpbrk"))) { +        } else if (Name == "strstr" || +                   Name == "strpbrk") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setOnlyReadsMemory(F);            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 6 && !strcmp(NameStr, "strtok")) || -                   (NameLen == 8 && !strcmp(NameStr, "strtok_r"))) { +        } else if (Name == "strtok" || +                   Name == "strtok_r") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 5 && !strcmp(NameStr, "scanf")) || -                   (NameLen == 6 && !strcmp(NameStr, "setbuf")) || -                   (NameLen == 7 && !strcmp(NameStr, "setvbuf"))) { +        } else if (Name == "scanf" || +                   Name == "setbuf" || +                   Name == "setvbuf") {            if (FTy->getNumParams() < 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 6 && !strcmp(NameStr, "strdup")) || -                   (NameLen == 7 && !strcmp(NameStr, "strndup"))) { +        } else if (Name == "strdup" || +                   Name == "strndup") {            if (FTy->getNumParams() < 1 ||                !isa<PointerType>(FTy->getReturnType()) ||                !isa<PointerType>(FTy->getParamType(0))) @@ -1824,10 +1846,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotAlias(F, 0);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 4 && !strcmp(NameStr, "stat")) || -                   (NameLen == 6 && !strcmp(NameStr, "sscanf")) || -                   (NameLen == 7 && !strcmp(NameStr, "sprintf")) || -                   (NameLen == 7 && !strcmp(NameStr, "statvfs"))) { +        } else if (Name == "stat" || +                   Name == "sscanf" || +                   Name == "sprintf" || +                   Name == "statvfs") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -1835,7 +1857,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 8 && !strcmp(NameStr, "snprintf")) { +        } else if (Name == "snprintf") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(2))) @@ -1843,7 +1865,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 3); -        } else if (NameLen == 9 && !strcmp(NameStr, "setitimer")) { +        } else if (Name == "setitimer") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(1)) ||                !isa<PointerType>(FTy->getParamType(2))) @@ -1851,7 +1873,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 2);            setDoesNotCapture(F, 3); -        } else if (NameLen == 6 && !strcmp(NameStr, "system")) { +        } else if (Name == "system") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -1860,7 +1882,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'm': -        if (NameLen == 6 && !strcmp(NameStr, "memcmp")) { +        if (Name == "malloc") { +          if (FTy->getNumParams() != 1 || +              !isa<PointerType>(FTy->getReturnType())) +            continue; +          setDoesNotThrow(F); +          setDoesNotAlias(F, 0); +        } else if (Name == "memcmp") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -1869,29 +1897,29 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 6 && !strcmp(NameStr, "memchr")) || -                   (NameLen == 7 && !strcmp(NameStr, "memrchr"))) { +        } else if (Name == "memchr" || +                   Name == "memrchr") {            if (FTy->getNumParams() != 3)              continue;            setOnlyReadsMemory(F);            setDoesNotThrow(F); -        } else if ((NameLen == 4 && !strcmp(NameStr, "modf")) || -                   (NameLen == 5 && !strcmp(NameStr, "modff")) || -                   (NameLen == 5 && !strcmp(NameStr, "modfl")) || -                   (NameLen == 6 && !strcmp(NameStr, "memcpy")) || -                   (NameLen == 7 && !strcmp(NameStr, "memccpy")) || -                   (NameLen == 7 && !strcmp(NameStr, "memmove"))) { +        } else if (Name == "modf" || +                   Name == "modff" || +                   Name == "modfl" || +                   Name == "memcpy" || +                   Name == "memccpy" || +                   Name == "memmove") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if (NameLen == 8 && !strcmp(NameStr, "memalign")) { +        } else if (Name == "memalign") {            if (!isa<PointerType>(FTy->getReturnType()))              continue;            setDoesNotAlias(F, 0); -        } else if ((NameLen == 5 && !strcmp(NameStr, "mkdir")) || -                   (NameLen == 6 && !strcmp(NameStr, "mktime"))) { +        } else if (Name == "mkdir" || +                   Name == "mktime") {            if (FTy->getNumParams() == 0 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -1900,7 +1928,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'r': -        if (NameLen == 7 && !strcmp(NameStr, "realloc")) { +        if (Name == "realloc") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getReturnType())) @@ -1908,23 +1936,23 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotAlias(F, 0);            setDoesNotCapture(F, 1); -        } else if (NameLen == 4 && !strcmp(NameStr, "read")) { +        } else if (Name == "read") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            // May throw; "read" is a valid pthread cancellation point.            setDoesNotCapture(F, 2); -        } else if ((NameLen == 5 && !strcmp(NameStr, "rmdir")) || -                   (NameLen == 6 && !strcmp(NameStr, "rewind")) || -                   (NameLen == 6 && !strcmp(NameStr, "remove")) || -                   (NameLen == 8 && !strcmp(NameStr, "realpath"))) { +        } else if (Name == "rmdir" || +                   Name == "rewind" || +                   Name == "remove" || +                   Name == "realpath") {            if (FTy->getNumParams() < 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 6 && !strcmp(NameStr, "rename")) || -                   (NameLen == 8 && !strcmp(NameStr, "readlink"))) { +        } else if (Name == "rename" || +                   Name == "readlink") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -1935,7 +1963,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'w': -        if (NameLen == 5 && !strcmp(NameStr, "write")) { +        if (Name == "write") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(1)))              continue; @@ -1944,7 +1972,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'b': -        if (NameLen == 5 && !strcmp(NameStr, "bcopy")) { +        if (Name == "bcopy") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -1952,7 +1980,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 4 && !strcmp(NameStr, "bcmp")) { +        } else if (Name == "bcmp") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -1961,7 +1989,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setOnlyReadsMemory(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 5 && !strcmp(NameStr, "bzero")) { +        } else if (Name == "bzero") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -1970,17 +1998,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'c': -        if (NameLen == 6 && !strcmp(NameStr, "calloc")) { +        if (Name == "calloc") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getReturnType()))              continue;            setDoesNotThrow(F);            setDoesNotAlias(F, 0); -        } else if ((NameLen == 5 && !strcmp(NameStr, "chmod")) || -                   (NameLen == 5 && !strcmp(NameStr, "chown")) || -                   (NameLen == 7 && !strcmp(NameStr, "ctermid")) || -                   (NameLen == 8 && !strcmp(NameStr, "clearerr")) || -                   (NameLen == 8 && !strcmp(NameStr, "closedir"))) { +        } else if (Name == "chmod" || +                   Name == "chown" || +                   Name == "ctermid" || +                   Name == "clearerr" || +                   Name == "closedir") {            if (FTy->getNumParams() == 0 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -1989,17 +2017,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'a': -        if ((NameLen == 4 && !strcmp(NameStr, "atoi")) || -            (NameLen == 4 && !strcmp(NameStr, "atol")) || -            (NameLen == 4 && !strcmp(NameStr, "atof")) || -            (NameLen == 5 && !strcmp(NameStr, "atoll"))) { +        if (Name == "atoi" || +            Name == "atol" || +            Name == "atof" || +            Name == "atoll") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setOnlyReadsMemory(F);            setDoesNotCapture(F, 1); -        } else if (NameLen == 6 && !strcmp(NameStr, "access")) { +        } else if (Name == "access") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -2008,7 +2036,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'f': -        if (NameLen == 5 && !strcmp(NameStr, "fopen")) { +        if (Name == "fopen") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getReturnType()) ||                !isa<PointerType>(FTy->getParamType(0)) || @@ -2018,7 +2046,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotAlias(F, 0);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 6 && !strcmp(NameStr, "fdopen")) { +        } else if (Name == "fdopen") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getReturnType()) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -2026,52 +2054,52 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotAlias(F, 0);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 4 && !strcmp(NameStr, "feof")) || -                   (NameLen == 4 && !strcmp(NameStr, "free")) || -                   (NameLen == 5 && !strcmp(NameStr, "fseek")) || -                   (NameLen == 5 && !strcmp(NameStr, "ftell")) || -                   (NameLen == 5 && !strcmp(NameStr, "fgetc")) || -                   (NameLen == 6 && !strcmp(NameStr, "fseeko")) || -                   (NameLen == 6 && !strcmp(NameStr, "ftello")) || -                   (NameLen == 6 && !strcmp(NameStr, "fileno")) || -                   (NameLen == 6 && !strcmp(NameStr, "fflush")) || -                   (NameLen == 6 && !strcmp(NameStr, "fclose")) || -                   (NameLen == 7 && !strcmp(NameStr, "fsetpos")) || -                   (NameLen == 9 && !strcmp(NameStr, "flockfile")) || -                   (NameLen == 11 && !strcmp(NameStr, "funlockfile")) || -                   (NameLen == 12 && !strcmp(NameStr, "ftrylockfile"))) { +        } else if (Name == "feof" || +                   Name == "free" || +                   Name == "fseek" || +                   Name == "ftell" || +                   Name == "fgetc" || +                   Name == "fseeko" || +                   Name == "ftello" || +                   Name == "fileno" || +                   Name == "fflush" || +                   Name == "fclose" || +                   Name == "fsetpos" || +                   Name == "flockfile" || +                   Name == "funlockfile" || +                   Name == "ftrylockfile") {            if (FTy->getNumParams() == 0 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if (NameLen == 6 && !strcmp(NameStr, "ferror")) { +        } else if (Name == "ferror") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setOnlyReadsMemory(F); -        } else if ((NameLen == 5 && !strcmp(NameStr, "fputc")) || -                   (NameLen == 5 && !strcmp(NameStr, "fstat")) || -                   (NameLen == 5 && !strcmp(NameStr, "frexp")) || -                   (NameLen == 6 && !strcmp(NameStr, "frexpf")) || -                   (NameLen == 6 && !strcmp(NameStr, "frexpl")) || -                   (NameLen == 8 && !strcmp(NameStr, "fstatvfs"))) { +        } else if (Name == "fputc" || +                   Name == "fstat" || +                   Name == "frexp" || +                   Name == "frexpf" || +                   Name == "frexpl" || +                   Name == "fstatvfs") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if (NameLen == 5 && !strcmp(NameStr, "fgets")) { +        } else if (Name == "fgets") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(2)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 3); -        } else if ((NameLen == 5 && !strcmp(NameStr, "fread")) || -                   (NameLen == 6 && !strcmp(NameStr, "fwrite"))) { +        } else if (Name == "fread" || +                   Name == "fwrite") {            if (FTy->getNumParams() != 4 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(3))) @@ -2079,10 +2107,10 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 4); -        } else if ((NameLen == 5 && !strcmp(NameStr, "fputs")) || -                   (NameLen == 6 && !strcmp(NameStr, "fscanf")) || -                   (NameLen == 7 && !strcmp(NameStr, "fprintf")) || -                   (NameLen == 7 && !strcmp(NameStr, "fgetpos"))) { +        } else if (Name == "fputs" || +                   Name == "fscanf" || +                   Name == "fprintf" || +                   Name == "fgetpos") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -2093,31 +2121,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'g': -        if ((NameLen == 4 && !strcmp(NameStr, "getc")) || -            (NameLen == 10 && !strcmp(NameStr, "getlogin_r")) || -            (NameLen == 13 && !strcmp(NameStr, "getc_unlocked"))) { +        if (Name == "getc" || +            Name == "getlogin_r" || +            Name == "getc_unlocked") {            if (FTy->getNumParams() == 0 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if (NameLen == 6 && !strcmp(NameStr, "getenv")) { +        } else if (Name == "getenv") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setOnlyReadsMemory(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 4 && !strcmp(NameStr, "gets")) || -                   (NameLen == 7 && !strcmp(NameStr, "getchar"))) { +        } else if (Name == "gets" || +                   Name == "getchar") {            setDoesNotThrow(F); -        } else if (NameLen == 9 && !strcmp(NameStr, "getitimer")) { +        } else if (Name == "getitimer") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if (NameLen == 8 && !strcmp(NameStr, "getpwnam")) { +        } else if (Name == "getpwnam") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -2126,22 +2154,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'u': -        if (NameLen == 6 && !strcmp(NameStr, "ungetc")) { +        if (Name == "ungetc") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 5 && !strcmp(NameStr, "uname")) || -                   (NameLen == 6 && !strcmp(NameStr, "unlink")) || -                   (NameLen == 8 && !strcmp(NameStr, "unsetenv"))) { +        } else if (Name == "uname" || +                   Name == "unlink" || +                   Name == "unsetenv") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 5 && !strcmp(NameStr, "utime")) || -                   (NameLen == 6 && !strcmp(NameStr, "utimes"))) { +        } else if (Name == "utime" || +                   Name == "utimes") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -2152,30 +2180,30 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'p': -        if (NameLen == 4 && !strcmp(NameStr, "putc")) { +        if (Name == "putc") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 4 && !strcmp(NameStr, "puts")) || -                   (NameLen == 6 && !strcmp(NameStr, "printf")) || -                   (NameLen == 6 && !strcmp(NameStr, "perror"))) { +        } else if (Name == "puts" || +                   Name == "printf" || +                   Name == "perror") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 5 && !strcmp(NameStr, "pread")) || -                   (NameLen == 6 && !strcmp(NameStr, "pwrite"))) { +        } else if (Name == "pread" || +                   Name == "pwrite") {            if (FTy->getNumParams() != 4 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            // May throw; these are valid pthread cancellation points.            setDoesNotCapture(F, 2); -        } else if (NameLen == 7 && !strcmp(NameStr, "putchar")) { +        } else if (Name == "putchar") {            setDoesNotThrow(F); -        } else if (NameLen == 5 && !strcmp(NameStr, "popen")) { +        } else if (Name == "popen") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getReturnType()) ||                !isa<PointerType>(FTy->getParamType(0)) || @@ -2185,7 +2213,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotAlias(F, 0);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 6 && !strcmp(NameStr, "pclose")) { +        } else if (Name == "pclose") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -2194,14 +2222,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'v': -        if (NameLen == 6 && !strcmp(NameStr, "vscanf")) { +        if (Name == "vscanf") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 7 && !strcmp(NameStr, "vsscanf")) || -                   (NameLen == 7 && !strcmp(NameStr, "vfscanf"))) { +        } else if (Name == "vsscanf" || +                   Name == "vfscanf") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(1)) ||                !isa<PointerType>(FTy->getParamType(2))) @@ -2209,19 +2237,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 6 && !strcmp(NameStr, "valloc")) { +        } else if (Name == "valloc") {            if (!isa<PointerType>(FTy->getReturnType()))              continue;            setDoesNotThrow(F);            setDoesNotAlias(F, 0); -        } else if (NameLen == 7 && !strcmp(NameStr, "vprintf")) { +        } else if (Name == "vprintf") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 8 && !strcmp(NameStr, "vfprintf")) || -                   (NameLen == 8 && !strcmp(NameStr, "vsprintf"))) { +        } else if (Name == "vfprintf" || +                   Name == "vsprintf") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -2229,7 +2257,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 9 && !strcmp(NameStr, "vsnprintf")) { +        } else if (Name == "vsnprintf") {            if (FTy->getNumParams() != 4 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(2))) @@ -2240,13 +2268,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'o': -        if (NameLen == 4 && !strcmp(NameStr, "open")) { +        if (Name == "open") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            // May throw; "open" is a valid pthread cancellation point.            setDoesNotCapture(F, 1); -        } else if (NameLen == 7 && !strcmp(NameStr, "opendir")) { +        } else if (Name == "opendir") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getReturnType()) ||                !isa<PointerType>(FTy->getParamType(0))) @@ -2257,12 +2285,12 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 't': -        if (NameLen == 7 && !strcmp(NameStr, "tmpfile")) { +        if (Name == "tmpfile") {            if (!isa<PointerType>(FTy->getReturnType()))              continue;            setDoesNotThrow(F);            setDoesNotAlias(F, 0); -        } else if (NameLen == 5 && !strcmp(NameStr, "times")) { +        } else if (Name == "times") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -2271,21 +2299,21 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'h': -        if ((NameLen == 5 && !strcmp(NameStr, "htonl")) || -            (NameLen == 5 && !strcmp(NameStr, "htons"))) { +        if (Name == "htonl" || +            Name == "htons") {            setDoesNotThrow(F);            setDoesNotAccessMemory(F);          }          break;        case 'n': -        if ((NameLen == 5 && !strcmp(NameStr, "ntohl")) || -            (NameLen == 5 && !strcmp(NameStr, "ntohs"))) { +        if (Name == "ntohl" || +            Name == "ntohs") {            setDoesNotThrow(F);            setDoesNotAccessMemory(F);          }          break;        case 'l': -        if (NameLen == 5 && !strcmp(NameStr, "lstat")) { +        if (Name == "lstat") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -2293,7 +2321,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 6 && !strcmp(NameStr, "lchown")) { +        } else if (Name == "lchown") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; @@ -2302,7 +2330,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 'q': -        if (NameLen == 5 && !strcmp(NameStr, "qsort")) { +        if (Name == "qsort") {            if (FTy->getNumParams() != 4 ||                !isa<PointerType>(FTy->getParamType(3)))              continue; @@ -2311,8 +2339,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case '_': -        if ((NameLen == 8 && !strcmp(NameStr, "__strdup")) || -            (NameLen == 9 && !strcmp(NameStr, "__strndup"))) { +        if (Name == "__strdup" || +            Name == "__strndup") {            if (FTy->getNumParams() < 1 ||                !isa<PointerType>(FTy->getReturnType()) ||                !isa<PointerType>(FTy->getParamType(0))) @@ -2320,19 +2348,19 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotAlias(F, 0);            setDoesNotCapture(F, 1); -        } else if (NameLen == 10 && !strcmp(NameStr, "__strtok_r")) { +        } else if (Name == "__strtok_r") {            if (FTy->getNumParams() != 3 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if (NameLen == 8 && !strcmp(NameStr, "_IO_getc")) { +        } else if (Name == "_IO_getc") {            if (FTy->getNumParams() != 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if (NameLen == 8 && !strcmp(NameStr, "_IO_putc")) { +        } else if (Name == "_IO_putc") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue; @@ -2341,16 +2369,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) {          }          break;        case 1: -        if (NameLen == 15 && !strcmp(NameStr, "\1__isoc99_scanf")) { +        if (Name == "\1__isoc99_scanf") {            if (FTy->getNumParams() < 1 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if ((NameLen == 7 && !strcmp(NameStr, "\1stat64")) || -                   (NameLen == 8 && !strcmp(NameStr, "\1lstat64")) || -                   (NameLen == 10 && !strcmp(NameStr, "\1statvfs64")) || -                   (NameLen == 16 && !strcmp(NameStr, "\1__isoc99_sscanf"))) { +        } else if (Name == "\1stat64" || +                   Name == "\1lstat64" || +                   Name == "\1statvfs64" || +                   Name == "\1__isoc99_sscanf") {            if (FTy->getNumParams() < 1 ||                !isa<PointerType>(FTy->getParamType(0)) ||                !isa<PointerType>(FTy->getParamType(1))) @@ -2358,7 +2386,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotThrow(F);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if (NameLen == 8 && !strcmp(NameStr, "\1fopen64")) { +        } else if (Name == "\1fopen64") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getReturnType()) ||                !isa<PointerType>(FTy->getParamType(0)) || @@ -2368,26 +2396,26 @@ bool SimplifyLibCalls::doInitialization(Module &M) {            setDoesNotAlias(F, 0);            setDoesNotCapture(F, 1);            setDoesNotCapture(F, 2); -        } else if ((NameLen == 9 && !strcmp(NameStr, "\1fseeko64")) || -                   (NameLen == 9 && !strcmp(NameStr, "\1ftello64"))) { +        } else if (Name == "\1fseeko64" || +                   Name == "\1ftello64") {            if (FTy->getNumParams() == 0 ||                !isa<PointerType>(FTy->getParamType(0)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 1); -        } else if (NameLen == 10 && !strcmp(NameStr, "\1tmpfile64")) { +        } else if (Name == "\1tmpfile64") {            if (!isa<PointerType>(FTy->getReturnType()))              continue;            setDoesNotThrow(F);            setDoesNotAlias(F, 0); -        } else if ((NameLen == 8 && !strcmp(NameStr, "\1fstat64")) || -                   (NameLen == 11 && !strcmp(NameStr, "\1fstatvfs64"))) { +        } else if (Name == "\1fstat64" || +                   Name == "\1fstatvfs64") {            if (FTy->getNumParams() != 2 ||                !isa<PointerType>(FTy->getParamType(1)))              continue;            setDoesNotThrow(F);            setDoesNotCapture(F, 2); -        } else if (NameLen == 7 && !strcmp(NameStr, "\1open64")) { +        } else if (Name == "\1open64") {            if (FTy->getNumParams() < 2 ||                !isa<PointerType>(FTy->getParamType(0)))              continue; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index c037ee960317..68689d6f13b7 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -30,8 +30,8 @@  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/SmallPtrSet.h"  #include <map> @@ -45,7 +45,7 @@ TailDupThreshold("taildup-threshold",                   cl::init(1), cl::Hidden);  namespace { -  class VISIBILITY_HIDDEN TailDup : public FunctionPass { +  class TailDup : public FunctionPass {      bool runOnFunction(Function &F);    public:      static char ID; // Pass identification, replacement for typeid @@ -128,7 +128,7 @@ bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,      // other instructions.      if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false; -    // Allso alloca and malloc. +    // Also alloca and malloc.      if (isa<AllocationInst>(I)) return false;      // Some vector instructions can expand into a number of instructions. @@ -243,13 +243,13 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {    BasicBlock *DestBlock = Branch->getSuccessor(0);    assert(SourceBlock != DestBlock && "Our predicate is broken!"); -  DOUT << "TailDuplication[" << SourceBlock->getParent()->getName() -       << "]: Eliminating branch: " << *Branch; +  DEBUG(errs() << "TailDuplication[" << SourceBlock->getParent()->getName() +        << "]: Eliminating branch: " << *Branch);    // See if we can avoid duplicating code by moving it up to a dominator of both    // blocks.    if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) { -    DOUT << "Found shared dominator: " << DomBlock->getName() << "\n"; +    DEBUG(errs() << "Found shared dominator: " << DomBlock->getName() << "\n");      // If there are non-phi instructions in DestBlock that have no operands      // defined in DestBlock, and if the instruction has no side effects, we can @@ -258,7 +258,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {      while (!isa<TerminatorInst>(BBI)) {        Instruction *I = BBI++; -      bool CanHoist = !I->isTrapping() && !I->mayHaveSideEffects(); +      bool CanHoist = I->isSafeToSpeculativelyExecute() && +                      !I->mayReadFromMemory();        if (CanHoist) {          for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)            if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op))) @@ -271,7 +272,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {            // Remove from DestBlock, move right before the term in DomBlock.            DestBlock->getInstList().remove(I);            DomBlock->getInstList().insert(DomBlock->getTerminator(), I); -          DOUT << "Hoisted: " << *I; +          DEBUG(errs() << "Hoisted: " << *I);          }        }      } @@ -358,7 +359,8 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {        Instruction *Inst = BI++;        if (isInstructionTriviallyDead(Inst))          Inst->eraseFromParent(); -      else if (Constant *C = ConstantFoldInstruction(Inst)) { +      else if (Constant *C = ConstantFoldInstruction(Inst, +                                                     Inst->getContext())) {          Inst->replaceAllUsesWith(C);          Inst->eraseFromParent();        } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 34ee57c9b9dc..b56e17040db2 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -60,14 +60,13 @@  #include "llvm/Pass.h"  #include "llvm/Support/CFG.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h"  using namespace llvm;  STATISTIC(NumEliminated, "Number of tail calls removed");  STATISTIC(NumAccumAdded, "Number of accumulators introduced");  namespace { -  struct VISIBILITY_HIDDEN TailCallElim : public FunctionPass { +  struct TailCallElim : public FunctionPass {      static char ID; // Pass identification, replacement for typeid      TailCallElim() : FunctionPass(&ID) {} @@ -394,7 +393,7 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,    // create the new entry block, allowing us to branch back to the old entry.    if (OldEntry == 0) {      OldEntry = &F->getEntryBlock(); -    BasicBlock *NewEntry = BasicBlock::Create("", F, OldEntry); +    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);      NewEntry->takeName(OldEntry);      OldEntry->setName("tailrecurse");      BranchInst::Create(OldEntry, NewEntry); diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index 71049fa212d3..135a621f5d96 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -19,17 +19,18 @@  #include "llvm/Target/TargetData.h"  #include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Support/PatternMatch.h" +#include "llvm/Support/raw_ostream.h"  using namespace llvm;  using namespace llvm::PatternMatch; -void ExtAddrMode::print(OStream &OS) const { +void ExtAddrMode::print(raw_ostream &OS) const {    bool NeedPlus = false;    OS << "[";    if (BaseGV) {      OS << (NeedPlus ? " + " : "")         << "GV:"; -    WriteAsOperand(*OS.stream(), BaseGV, /*PrintType=*/false); +    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);      NeedPlus = true;    } @@ -39,13 +40,13 @@ void ExtAddrMode::print(OStream &OS) const {    if (BaseReg) {      OS << (NeedPlus ? " + " : "")         << "Base:"; -    WriteAsOperand(*OS.stream(), BaseReg, /*PrintType=*/false); +    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);      NeedPlus = true;    }    if (Scale) {      OS << (NeedPlus ? " + " : "")         << Scale << "*"; -    WriteAsOperand(*OS.stream(), ScaledReg, /*PrintType=*/false); +    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);      NeedPlus = true;    } @@ -53,8 +54,8 @@ void ExtAddrMode::print(OStream &OS) const {  }  void ExtAddrMode::dump() const { -  print(cerr); -  cerr << '\n'; +  print(errs()); +  errs() << '\n';  } @@ -205,7 +206,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,      if (!RHS) return false;      int64_t Scale = RHS->getSExtValue();      if (Opcode == Instruction::Shl) -      Scale = 1 << Scale; +      Scale = 1LL << Scale;      return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);    } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 6d1180d0dd9a..4931ab3f7fad 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -16,6 +16,7 @@  #include "llvm/Function.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h"  #include "llvm/Constant.h"  #include "llvm/Type.h"  #include "llvm/Analysis/AliasAnalysis.h" @@ -23,6 +24,8 @@  #include "llvm/Analysis/Dominators.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/ValueHandle.h"  #include <algorithm>  using namespace llvm; @@ -249,11 +252,11 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {        Value *RetVal = 0;        // Create a value to return... if the function doesn't return null... -      if (BB->getParent()->getReturnType() != Type::VoidTy) +      if (BB->getParent()->getReturnType() != Type::getVoidTy(TI->getContext()))          RetVal = Constant::getNullValue(BB->getParent()->getReturnType());        // Create the return... -      NewTI = ReturnInst::Create(RetVal); +      NewTI = ReturnInst::Create(TI->getContext(), RetVal);      }      break; @@ -261,8 +264,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {    case Instruction::Switch:    // Should remove entry    default:    case Instruction::Ret:       // Cannot happen, has no successors! -    assert(0 && "Unhandled terminator instruction type in RemoveSuccessor!"); -    abort(); +    llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");    }    if (NewTI)   // If it's a different instruction, replace. @@ -318,7 +320,8 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {      ++SplitIt;    BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split"); -  // The new block lives in whichever loop the old one did. +  // The new block lives in whichever loop the old one did. This preserves +  // LCSSA as well, because we force the split point to be after any PHI nodes.    if (LoopInfo* LI = P->getAnalysisIfAvailable<LoopInfo>())      if (Loop *L = LI->getLoopFor(Old))        L->addBasicBlockToLoop(New, LI->getBase()); @@ -352,32 +355,61 @@ BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {  /// Preds array, which has NumPreds elements in it.  The new block is given a  /// suffix of 'Suffix'.  /// -/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree and -/// DominanceFrontier, but no other analyses. +/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, +/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. +/// In particular, it does not preserve LoopSimplify (because it's +/// complicated to handle the case where one of the edges being split +/// is an exit of a loop with other exits). +///  BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,                                            BasicBlock *const *Preds,                                           unsigned NumPreds, const char *Suffix,                                           Pass *P) {    // Create new basic block, insert right before the original block. -  BasicBlock *NewBB = -    BasicBlock::Create(BB->getName()+Suffix, BB->getParent(), BB); +  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, +                                         BB->getParent(), BB);    // The new block unconditionally branches to the old block.    BranchInst *BI = BranchInst::Create(BB, NewBB); +  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; +  Loop *L = LI ? LI->getLoopFor(BB) : 0; +  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); +    // Move the edges from Preds to point to NewBB instead of BB. -  for (unsigned i = 0; i != NumPreds; ++i) +  // While here, if we need to preserve loop analyses, collect +  // some information about how this split will affect loops. +  bool HasLoopExit = false; +  bool IsLoopEntry = !!L; +  bool SplitMakesNewLoopHeader = false; +  for (unsigned i = 0; i != NumPreds; ++i) {      Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); -   + +    if (LI) { +      // If we need to preserve LCSSA, determine if any of +      // the preds is a loop exit. +      if (PreserveLCSSA) +        if (Loop *PL = LI->getLoopFor(Preds[i])) +          if (!PL->contains(BB)) +            HasLoopExit = true; +      // If we need to preserve LoopInfo, note whether any of the +      // preds crosses an interesting loop boundary. +      if (L) { +        if (L->contains(Preds[i])) +          IsLoopEntry = false; +        else +          SplitMakesNewLoopHeader = true; +      } +    } +  } +    // Update dominator tree and dominator frontier if available.    DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;    if (DT)      DT->splitBlock(NewBB);    if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)      DF->splitBlock(NewBB); -  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; -   -   +    // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI    // node becomes an incoming value for BB's phi node.  However, if the Preds    // list is empty, we need to insert dummy entries into the PHI nodes in BB to @@ -388,20 +420,42 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,        cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);      return NewBB;    } + +  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; + +  if (L) { +    if (IsLoopEntry) { +      if (Loop *PredLoop = LI->getLoopFor(Preds[0])) { +        // Add the new block to the nearest enclosing loop (and not an +        // adjacent loop). +        while (PredLoop && !PredLoop->contains(BB)) +          PredLoop = PredLoop->getParentLoop(); +        if (PredLoop) +          PredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); +      } +    } else { +      L->addBasicBlockToLoop(NewBB, LI->getBase()); +      if (SplitMakesNewLoopHeader) +        L->moveToHeader(NewBB); +    } +  }    // Otherwise, create a new PHI node in NewBB for each PHI node in BB.    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {      PHINode *PN = cast<PHINode>(I++);      // Check to see if all of the values coming in are the same.  If so, we -    // don't need to create a new PHI node. -    Value *InVal = PN->getIncomingValueForBlock(Preds[0]); -    for (unsigned i = 1; i != NumPreds; ++i) -      if (InVal != PN->getIncomingValueForBlock(Preds[i])) { -        InVal = 0; -        break; -      } -     +    // don't need to create a new PHI node, unless it's needed for LCSSA. +    Value *InVal = 0; +    if (!HasLoopExit) { +      InVal = PN->getIncomingValueForBlock(Preds[0]); +      for (unsigned i = 1; i != NumPreds; ++i) +        if (InVal != PN->getIncomingValueForBlock(Preds[i])) { +          InVal = 0; +          break; +        } +    } +      if (InVal) {        // If all incoming values for the new PHI would be the same, just don't        // make a new PHI.  Instead, just remove the incoming values from the old @@ -426,16 +480,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,      // Add an incoming value to the PHI node in the loop for the preheader      // edge.      PN->addIncoming(InVal, NewBB); -     -    // Check to see if we can eliminate this phi node. -    if (Value *V = PN->hasConstantValue(DT != 0)) { -      Instruction *I = dyn_cast<Instruction>(V); -      if (!I || DT == 0 || DT->dominates(I, PN)) { -        PN->replaceAllUsesWith(V); -        if (AA) AA->deleteValue(PN); -        PN->eraseFromParent(); -      } -    }    }    return NewBB; @@ -503,11 +547,15 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) {    // Test if the values are trivially equivalent.    if (A == B) return true; -  // Test if the values come form identical arithmetic instructions. +  // Test if the values come from identical arithmetic instructions. +  // Use isIdenticalToWhenDefined instead of isIdenticalTo because +  // this function is only used when one address use dominates the +  // other, which means that they'll always either have the same +  // value or one of them will have an undefined value.    if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||        isa<PHINode>(A) || isa<GetElementPtrInst>(A))      if (const Instruction *BI = dyn_cast<Instruction>(B)) -      if (cast<Instruction>(A)->isIdenticalTo(BI)) +      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))          return true;    // Otherwise they may not be equivalent. @@ -537,7 +585,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,    unsigned AccessSize = 0;    if (AA) {      const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType(); -    AccessSize = AA->getTargetData().getTypeStoreSizeInBits(AccessTy); +    AccessSize = AA->getTypeStoreSize(AccessTy);    }    while (ScanFrom != ScanBB->begin()) { diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index 1650cfa30653..4b720b1e323c 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -13,7 +13,6 @@  //===----------------------------------------------------------------------===//  #define DEBUG_TYPE "basicinliner" -  #include "llvm/Module.h"  #include "llvm/Function.h"  #include "llvm/Transforms/Utils/BasicInliner.h" @@ -21,6 +20,7 @@  #include "llvm/Support/CallSite.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/SmallPtrSet.h"  #include <vector> @@ -89,7 +89,7 @@ void BasicInlinerImpl::inlineFunctions() {        }    } -  DOUT << ": " << CallSites.size() << " call sites.\n"; +  DEBUG(errs() << ": " << CallSites.size() << " call sites.\n");    // Inline call sites.    bool Changed = false; @@ -109,22 +109,22 @@ void BasicInlinerImpl::inlineFunctions() {          }          InlineCost IC = CA.getInlineCost(CS, NeverInline);          if (IC.isAlways()) {         -          DOUT << "  Inlining: cost=always" -               <<", call: " << *CS.getInstruction(); +          DEBUG(errs() << "  Inlining: cost=always" +                       <<", call: " << *CS.getInstruction());          } else if (IC.isNever()) { -          DOUT << "  NOT Inlining: cost=never" -               <<", call: " << *CS.getInstruction(); +          DEBUG(errs() << "  NOT Inlining: cost=never" +                       <<", call: " << *CS.getInstruction());            continue;          } else {            int Cost = IC.getValue();            if (Cost >= (int) BasicInlineThreshold) { -            DOUT << "  NOT Inlining: cost = " << Cost -                 << ", call: " <<  *CS.getInstruction(); +            DEBUG(errs() << "  NOT Inlining: cost = " << Cost +                         << ", call: " <<  *CS.getInstruction());              continue;            } else { -            DOUT << "  Inlining: cost = " << Cost -                 << ", call: " <<  *CS.getInstruction(); +            DEBUG(errs() << "  Inlining: cost = " << Cost +                         << ", call: " <<  *CS.getInstruction());            }          } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index c4fd1eae43cd..849b2b5d5cd6 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -21,11 +21,13 @@  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Analysis/Dominators.h"  #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileInfo.h"  #include "llvm/Function.h"  #include "llvm/Instructions.h"  #include "llvm/Type.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  using namespace llvm; @@ -43,6 +45,7 @@ namespace {        AU.addPreserved<DominatorTree>();        AU.addPreserved<DominanceFrontier>();        AU.addPreserved<LoopInfo>(); +      AU.addPreserved<ProfileInfo>();        // No loop canonicalization guarantees are broken by this pass.        AU.addPreservedID(LoopSimplifyID); @@ -114,6 +117,38 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,    return false;  } +/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form +/// may require new PHIs in the new exit block. This function inserts the +/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB +/// is the new loop exit block, and DestBB is the old loop exit, now the +/// successor of SplitBB. +static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds, +                                       BasicBlock *SplitBB, +                                       BasicBlock *DestBB) { +  // SplitBB shouldn't have anything non-trivial in it yet. +  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() && +         "SplitBB has non-PHI nodes!"); + +  // For each PHI in the destination block... +  for (BasicBlock::iterator I = DestBB->begin(); +       PHINode *PN = dyn_cast<PHINode>(I); ++I) { +    unsigned Idx = PN->getBasicBlockIndex(SplitBB); +    Value *V = PN->getIncomingValue(Idx); +    // If the input is a PHI which already satisfies LCSSA, don't create +    // a new one. +    if (const PHINode *VP = dyn_cast<PHINode>(V)) +      if (VP->getParent() == SplitBB) +        continue; +    // Otherwise a new PHI is needed. Create one and populate it. +    PHINode *NewPN = PHINode::Create(PN->getType(), "split", +                                     SplitBB->getTerminator()); +    for (unsigned i = 0, e = Preds.size(); i != e; ++i) +      NewPN->addIncoming(V, Preds[i]); +    // Update the original PHI. +    PN->setIncomingValue(Idx, NewPN); +  } +} +  /// SplitCriticalEdge - If this edge is a critical edge, insert a new node to  /// split the critical edge.  This will update DominatorTree and  /// DominatorFrontier  information if it is available, thus calling this pass @@ -121,15 +156,15 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,  /// false otherwise.  This ensures that all edges to that dest go to one block  /// instead of each going to a different block.  // -bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, -                             bool MergeIdenticalEdges) { -  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return false; +BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, +                                    Pass *P, bool MergeIdenticalEdges) { +  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;    BasicBlock *TIBB = TI->getParent();    BasicBlock *DestBB = TI->getSuccessor(SuccNum);    // Create a new basic block, linking it into the CFG. -  BasicBlock *NewBB = BasicBlock::Create(TIBB->getName() + "." + -                                         DestBB->getName() + "_crit_edge"); +  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), +                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");    // Create our unconditional branch...    BranchInst::Create(DestBB, NewBB); @@ -171,7 +206,7 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,    // If we don't have a pass object, we can't update anything... -  if (P == 0) return true; +  if (P == 0) return NewBB;    // Now update analysis information.  Since the only predecessor of NewBB is    // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate @@ -222,8 +257,8 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,      // If NewBBDominatesDestBB hasn't been computed yet, do so with DF.      if (!OtherPreds.empty()) {        // FIXME: IMPLEMENT THIS! -      assert(0 && "Requiring domfrontiers but not idom/domtree/domset." -             " not implemented yet!"); +      llvm_unreachable("Requiring domfrontiers but not idom/domtree/domset." +                       " not implemented yet!");      }      // Since the new block is dominated by its only predecessor TIBB, @@ -253,9 +288,9 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,    // Update LoopInfo if it is around.    if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>()) { -    // If one or the other blocks were not in a loop, the new block is not -    // either, and thus LI doesn't need to be updated. -    if (Loop *TIL = LI->getLoopFor(TIBB)) +    if (Loop *TIL = LI->getLoopFor(TIBB)) { +      // If one or the other blocks were not in a loop, the new block is not +      // either, and thus LI doesn't need to be updated.        if (Loop *DestLoop = LI->getLoopFor(DestBB)) {          if (TIL == DestLoop) {            // Both in the same loop, the NewBB joins loop. @@ -277,6 +312,65 @@ bool llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P,              P->addBasicBlockToLoop(NewBB, LI->getBase());          }        } +      // If TIBB is in a loop and DestBB is outside of that loop, split the +      // other exit blocks of the loop that also have predecessors outside +      // the loop, to maintain a LoopSimplify guarantee. +      if (!TIL->contains(DestBB) && +          P->mustPreserveAnalysisID(LoopSimplifyID)) { +        assert(!TIL->contains(NewBB) && +               "Split point for loop exit is contained in loop!"); + +        // Update LCSSA form in the newly created exit block. +        if (P->mustPreserveAnalysisID(LCSSAID)) { +          SmallVector<BasicBlock *, 1> OrigPred; +          OrigPred.push_back(TIBB); +          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB); +        } + +        // For each unique exit block... +        SmallVector<BasicBlock *, 4> ExitBlocks; +        TIL->getExitBlocks(ExitBlocks); +        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { +          // Collect all the preds that are inside the loop, and note +          // whether there are any preds outside the loop. +          SmallVector<BasicBlock *, 4> Preds; +          bool HasPredOutsideOfLoop = false; +          BasicBlock *Exit = ExitBlocks[i]; +          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); +               I != E; ++I) +            if (TIL->contains(*I)) +              Preds.push_back(*I); +            else +              HasPredOutsideOfLoop = true; +          // If there are any preds not in the loop, we'll need to split +          // the edges. The Preds.empty() check is needed because a block +          // may appear multiple times in the list. We can't use +          // getUniqueExitBlocks above because that depends on LoopSimplify +          // form, which we're in the process of restoring! +          if (!Preds.empty() && HasPredOutsideOfLoop) { +            BasicBlock *NewExitBB = +              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(), +                                     "split", P); +            if (P->mustPreserveAnalysisID(LCSSAID)) +              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit); +          } +        } +      } +      // LCSSA form was updated above for the case where LoopSimplify is +      // available, which means that all predecessors of loop exit blocks +      // are within the loop. Without LoopSimplify form, it would be +      // necessary to insert a new phi. +      assert((!P->mustPreserveAnalysisID(LCSSAID) || +              P->mustPreserveAnalysisID(LoopSimplifyID)) && +             "SplitCriticalEdge doesn't know how to update LCCSA form " +             "without LoopSimplify!"); +    }    } -  return true; + +  // Update ProfileInfo if it is around. +  if (ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>()) { +    PI->splitEdge(TIBB,DestBB,NewBB,MergeIdenticalEdges); +  } + +  return NewBB;  } diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 10cae5ca7087..f4394ea64d6e 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -6,11 +6,10 @@ add_llvm_library(LLVMTransformUtils    CloneFunction.cpp    CloneLoop.cpp    CloneModule.cpp -  CloneTrace.cpp    CodeExtractor.cpp    DemoteRegToStack.cpp -  InlineCost.cpp    InlineFunction.cpp +  InstructionNamer.cpp    LCSSA.cpp    Local.cpp    LoopSimplify.cpp @@ -19,12 +18,12 @@ add_llvm_library(LLVMTransformUtils    LowerSwitch.cpp    Mem2Reg.cpp    PromoteMemoryToRegister.cpp -  SimplifyCFG.cpp +  SSAUpdater.cpp    SSI.cpp +  SimplifyCFG.cpp    UnifyFunctionExitNodes.cpp    UnrollLoop.cpp    ValueMapper.cpp -  InstructionNamer.cpp    )  target_link_libraries (LLVMTransformUtils LLVMSupport) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index d0fdefa3f689..30130fa0a126 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -20,6 +20,7 @@  #include "llvm/IntrinsicInst.h"  #include "llvm/GlobalVariable.h"  #include "llvm/Function.h" +#include "llvm/LLVMContext.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Transforms/Utils/ValueMapper.h" @@ -34,7 +35,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,                                    DenseMap<const Value*, Value*> &ValueMap,                                    const char *NameSuffix, Function *F,                                    ClonedCodeInfo *CodeInfo) { -  BasicBlock *NewBB = BasicBlock::Create("", F); +  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);    if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);    bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; @@ -72,7 +73,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,  //  void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,                               DenseMap<const Value*, Value*> &ValueMap, -                             std::vector<ReturnInst*> &Returns, +                             SmallVectorImpl<ReturnInst*> &Returns,                               const char *NameSuffix, ClonedCodeInfo *CodeInfo) {    assert(NameSuffix && "NameSuffix cannot be null!"); @@ -165,7 +166,7 @@ Function *llvm::CloneFunction(const Function *F,        ValueMap[I] = DestI++;        // Add mapping to ValueMap      } -  std::vector<ReturnInst*> Returns;  // Ignore returns cloned... +  SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.    CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);    return NewF;  } @@ -179,7 +180,7 @@ namespace {      Function *NewFunc;      const Function *OldFunc;      DenseMap<const Value*, Value*> &ValueMap; -    std::vector<ReturnInst*> &Returns; +    SmallVectorImpl<ReturnInst*> &Returns;      const char *NameSuffix;      ClonedCodeInfo *CodeInfo;      const TargetData *TD; @@ -187,7 +188,7 @@ namespace {    public:      PruningFunctionCloner(Function *newFunc, const Function *oldFunc,                            DenseMap<const Value*, Value*> &valueMap, -                          std::vector<ReturnInst*> &returns, +                          SmallVectorImpl<ReturnInst*> &returns,                            const char *nameSuffix,                             ClonedCodeInfo *codeInfo,                            const TargetData *td) @@ -218,7 +219,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,    // Nope, clone it now.    BasicBlock *NewBB; -  BBEntry = NewBB = BasicBlock::Create(); +  BBEntry = NewBB = BasicBlock::Create(BB->getContext());    if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);    bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; @@ -237,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,      // Do not clone llvm.dbg.region.end. It will be adjusted by the inliner.      if (const DbgFuncStartInst *DFSI = dyn_cast<DbgFuncStartInst>(II)) {        if (DbgFnStart == NULL) { -        DISubprogram SP(cast<GlobalVariable>(DFSI->getSubprogram())); +        DISubprogram SP(DFSI->getSubprogram());          if (SP.describes(BB->getParent()))            DbgFnStart = DFSI->getSubprogram();        } @@ -323,17 +324,21 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,  /// mapping its operands through ValueMap if they are available.  Constant *PruningFunctionCloner::  ConstantFoldMappedInstruction(const Instruction *I) { +  LLVMContext &Context = I->getContext(); +      SmallVector<Constant*, 8> Ops;    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)      if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i), -                                                           ValueMap))) +                                                           ValueMap, +                                                           Context)))        Ops.push_back(Op);      else        return 0;  // All operands not constant!    if (const CmpInst *CI = dyn_cast<CmpInst>(I))      return ConstantFoldCompareInstOperands(CI->getPredicate(), -                                           &Ops[0], Ops.size(), TD); +                                           &Ops[0], Ops.size(),  +                                           Context, TD);    if (const LoadInst *LI = dyn_cast<LoadInst>(I))      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) @@ -344,7 +349,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {                                                            CE);    return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0], -                                  Ops.size(), TD); +                                  Ops.size(), Context, TD);  }  /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, @@ -356,11 +361,12 @@ ConstantFoldMappedInstruction(const Instruction *I) {  /// used for things like CloneFunction or CloneModule.  void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,                                       DenseMap<const Value*, Value*> &ValueMap, -                                     std::vector<ReturnInst*> &Returns, +                                     SmallVectorImpl<ReturnInst*> &Returns,                                       const char *NameSuffix,                                        ClonedCodeInfo *CodeInfo,                                       const TargetData *TD) {    assert(NameSuffix && "NameSuffix cannot be null!"); +  LLVMContext &Context = OldFunc->getContext();  #ifndef NDEBUG    for (Function::const_arg_iterator II = OldFunc->arg_begin(),  @@ -385,7 +391,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,    // insert it into the new function in the right order.  If not, ignore it.    //    // Defer PHI resolution until rest of function is resolved. -  std::vector<const PHINode*> PHIToResolve; +  SmallVector<const PHINode*, 16> PHIToResolve;    for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();         BI != BE; ++BI) {      BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]); @@ -430,7 +436,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,        for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {          if (BasicBlock *MappedBlock =               cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) { -          Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap); +          Value *InVal = MapValue(PN->getIncomingValue(pred), +                                  ValueMap, Context);            assert(InVal && "Unknown input value?");            PN->setIncomingValue(pred, InVal);            PN->setIncomingBlock(pred, MappedBlock); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 82f5b93a9544..0285f8c8d107 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -56,10 +56,11 @@ Module *llvm::CloneModule(const Module *M,    //    for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();         I != E; ++I) { -    GlobalVariable *GV = new GlobalVariable(I->getType()->getElementType(), +    GlobalVariable *GV = new GlobalVariable(*New,  +                                            I->getType()->getElementType(),                                              false,                                              GlobalValue::ExternalLinkage, 0, -                                            I->getName(), New); +                                            I->getName());      GV->setAlignment(I->getAlignment());      ValueMap[I] = GV;    } @@ -88,7 +89,8 @@ Module *llvm::CloneModule(const Module *M,      GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);      if (I->hasInitializer())        GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(), -                                                 ValueMap))); +                                                 ValueMap, +                                                 M->getContext())));      GV->setLinkage(I->getLinkage());      GV->setThreadLocal(I->isThreadLocal());      GV->setConstant(I->isConstant()); @@ -106,7 +108,7 @@ Module *llvm::CloneModule(const Module *M,          ValueMap[J] = DestI++;        } -      std::vector<ReturnInst*> Returns;  // Ignore returns cloned... +      SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.        CloneFunctionInto(F, I, ValueMap, Returns);      } @@ -119,7 +121,7 @@ Module *llvm::CloneModule(const Module *M,      GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);      GA->setLinkage(I->getLinkage());      if (const Constant* C = I->getAliasee()) -      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap))); +      GA->setAliasee(cast<Constant>(MapValue(C, ValueMap, M->getContext())));    }    return New; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 6d5904e30886..c39ccf7d3f45 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -18,6 +18,7 @@  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h"  #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Analysis/Dominators.h" @@ -27,6 +28,8 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/ADT/StringExtras.h"  #include <algorithm>  #include <set> @@ -180,8 +183,24 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {  void CodeExtractor::splitReturnBlocks() {    for (std::set<BasicBlock*>::iterator I = BlocksToExtract.begin(),           E = BlocksToExtract.end(); I != E; ++I) -    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) -      (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); +    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) { +      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret"); +      if (DT) { +        // Old dominates New. New node domiantes all other nodes dominated +        //by Old. +        DomTreeNode *OldNode = DT->getNode(*I); +        SmallVector<DomTreeNode*, 8> Children; +        for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end(); +             DI != DE; ++DI)  +          Children.push_back(*DI); + +        DomTreeNode *NewNode = DT->addNewBlock(New, *I); + +        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(), +               E = Children.end(); I != E; ++I)  +          DT->changeImmediateDominator(*I, NewNode); +      } +    }  }  // findInputsOutputs - Find inputs to, outputs from the code region. @@ -234,15 +253,15 @@ Function *CodeExtractor::constructFunction(const Values &inputs,                                             BasicBlock *newHeader,                                             Function *oldFunction,                                             Module *M) { -  DOUT << "inputs: " << inputs.size() << "\n"; -  DOUT << "outputs: " << outputs.size() << "\n"; +  DEBUG(errs() << "inputs: " << inputs.size() << "\n"); +  DEBUG(errs() << "outputs: " << outputs.size() << "\n");    // This function returns unsigned, outputs will go back by reference.    switch (NumExitBlocks) {    case 0: -  case 1: RetTy = Type::VoidTy; break; -  case 2: RetTy = Type::Int1Ty; break; -  default: RetTy = Type::Int16Ty; break; +  case 1: RetTy = Type::getVoidTy(header->getContext()); break; +  case 2: RetTy = Type::getInt1Ty(header->getContext()); break; +  default: RetTy = Type::getInt16Ty(header->getContext()); break;    }    std::vector<const Type*> paramTy; @@ -251,32 +270,34 @@ Function *CodeExtractor::constructFunction(const Values &inputs,    for (Values::const_iterator i = inputs.begin(),           e = inputs.end(); i != e; ++i) {      const Value *value = *i; -    DOUT << "value used in func: " << *value << "\n"; +    DEBUG(errs() << "value used in func: " << *value << "\n");      paramTy.push_back(value->getType());    }    // Add the types of the output values to the function's argument list.    for (Values::const_iterator I = outputs.begin(), E = outputs.end();         I != E; ++I) { -    DOUT << "instr used in func: " << **I << "\n"; +    DEBUG(errs() << "instr used in func: " << **I << "\n");      if (AggregateArgs)        paramTy.push_back((*I)->getType());      else        paramTy.push_back(PointerType::getUnqual((*I)->getType()));    } -  DOUT << "Function type: " << *RetTy << " f("; +  DEBUG(errs() << "Function type: " << *RetTy << " f(");    for (std::vector<const Type*>::iterator i = paramTy.begin(),           e = paramTy.end(); i != e; ++i) -    DOUT << **i << ", "; -  DOUT << ")\n"; +    DEBUG(errs() << **i << ", "); +  DEBUG(errs() << ")\n");    if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { -    PointerType *StructPtr = PointerType::getUnqual(StructType::get(paramTy)); +    PointerType *StructPtr = +           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));      paramTy.clear();      paramTy.push_back(StructPtr);    } -  const FunctionType *funcType = FunctionType::get(RetTy, paramTy, false); +  const FunctionType *funcType = +                  FunctionType::get(RetTy, paramTy, false);    // Create the new function    Function *newFunction = Function::Create(funcType, @@ -298,13 +319,13 @@ Function *CodeExtractor::constructFunction(const Values &inputs,      Value *RewriteVal;      if (AggregateArgs) {        Value *Idx[2]; -      Idx[0] = Constant::getNullValue(Type::Int32Ty); -      Idx[1] = ConstantInt::get(Type::Int32Ty, i); -      std::string GEPname = "gep_" + inputs[i]->getName(); +      Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); +      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);        TerminatorInst *TI = newFunction->begin()->getTerminator(); -      GetElementPtrInst *GEP = GetElementPtrInst::Create(AI, Idx, Idx+2,  -                                                         GEPname, TI); -      RewriteVal = new LoadInst(GEP, "load" + GEPname, TI); +      GetElementPtrInst *GEP =  +        GetElementPtrInst::Create(AI, Idx, Idx+2,  +                                  "gep_" + inputs[i]->getName(), TI); +      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);      } else        RewriteVal = AI++; @@ -340,6 +361,20 @@ Function *CodeExtractor::constructFunction(const Values &inputs,    return newFunction;  } +/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI +/// that uses the value within the basic block, and return the predecessor +/// block associated with that use, or return 0 if none is found. +static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { +  for (Value::use_iterator UI = Used->use_begin(), +       UE = Used->use_end(); UI != UE; ++UI) { +     PHINode *P = dyn_cast<PHINode>(*UI); +     if (P && P->getParent() == BB) +       return P->getIncomingBlock(UI); +  } +   +  return 0; +} +  /// emitCallAndSwitchStatement - This method sets up the caller side by adding  /// the call instruction, splitting any PHI nodes in the header block as  /// necessary. @@ -348,7 +383,9 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,                             Values &inputs, Values &outputs) {    // Emit a call to the new function, passing in: *pointer to struct (if    // aggregating parameters), or plan inputs and allocated memory for outputs -  std::vector<Value*> params, StructValues, ReloadOutputs; +  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads; +   +  LLVMContext &Context = newFunction->getContext();    // Add inputs as params, or to be filled into the struct    for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i) @@ -378,7 +415,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,        ArgTypes.push_back((*v)->getType());      // Allocate a struct at the beginning of this function -    Type *StructArgTy = StructType::get(ArgTypes); +    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);      Struct =        new AllocaInst(StructArgTy, 0, "structArg",                       codeReplacer->getParent()->begin()->begin()); @@ -386,8 +423,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,      for (unsigned i = 0, e = inputs.size(); i != e; ++i) {        Value *Idx[2]; -      Idx[0] = Constant::getNullValue(Type::Int32Ty); -      Idx[1] = ConstantInt::get(Type::Int32Ty, i); +      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); +      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);        GetElementPtrInst *GEP =          GetElementPtrInst::Create(Struct, Idx, Idx + 2,                                    "gep_" + StructValues[i]->getName()); @@ -412,8 +449,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,      Value *Output = 0;      if (AggregateArgs) {        Value *Idx[2]; -      Idx[0] = Constant::getNullValue(Type::Int32Ty); -      Idx[1] = ConstantInt::get(Type::Int32Ty, FirstOut + i); +      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); +      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);        GetElementPtrInst *GEP          = GetElementPtrInst::Create(Struct, Idx, Idx + 2,                                      "gep_reload_" + outputs[i]->getName()); @@ -423,6 +460,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,        Output = ReloadOutputs[i];      }      LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); +    Reloads.push_back(load);      codeReplacer->getInstList().push_back(load);      std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());      for (unsigned u = 0, e = Users.size(); u != e; ++u) { @@ -434,7 +472,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,    // Now we can emit a switch statement using the call as a value.    SwitchInst *TheSwitch = -      SwitchInst::Create(ConstantInt::getNullValue(Type::Int16Ty), +      SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),                           codeReplacer, 0, codeReplacer);    // Since there may be multiple exits from the original region, make the new @@ -456,7 +494,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,          if (!NewTarget) {            // If we don't already have an exit stub for this non-extracted            // destination, create one now! -          NewTarget = BasicBlock::Create(OldTarget->getName() + ".exitStub", +          NewTarget = BasicBlock::Create(Context, +                                         OldTarget->getName() + ".exitStub",                                           newFunction);            unsigned SuccNum = switchVal++; @@ -465,17 +504,18 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,            case 0:            case 1: break;  // No value needed.            case 2:         // Conditional branch, return a bool -            brVal = ConstantInt::get(Type::Int1Ty, !SuccNum); +            brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);              break;            default: -            brVal = ConstantInt::get(Type::Int16Ty, SuccNum); +            brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);              break;            } -          ReturnInst *NTRet = ReturnInst::Create(brVal, NewTarget); +          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);            // Update the switch instruction. -          TheSwitch->addCase(ConstantInt::get(Type::Int16Ty, SuccNum), +          TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), +                                              SuccNum),                               OldTarget);            // Restore values just before we exit @@ -507,14 +547,25 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,                  DominatesDef = false;              } -            if (DT) +            if (DT) {                DominatesDef = DT->dominates(DefBlock, OldTarget); +               +              // If the output value is used by a phi in the target block, +              // then we need to test for dominance of the phi's predecessor +              // instead.  Unfortunately, this a little complicated since we +              // have already rewritten uses of the value to uses of the reload. +              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out],  +                                                          OldTarget); +              if (pred && DT && DT->dominates(DefBlock, pred)) +                DominatesDef = true; +            }              if (DominatesDef) {                if (AggregateArgs) {                  Value *Idx[2]; -                Idx[0] = Constant::getNullValue(Type::Int32Ty); -                Idx[1] = ConstantInt::get(Type::Int32Ty,FirstOut+out); +                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); +                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), +                                          FirstOut+out);                  GetElementPtrInst *GEP =                    GetElementPtrInst::Create(OAI, Idx, Idx + 2,                                              "gep_" + outputs[out]->getName(), @@ -543,15 +594,16 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,      // this should be rewritten as a `ret'      // Check if the function should return a value -    if (OldFnRetTy == Type::VoidTy) { -      ReturnInst::Create(0, TheSwitch);  // Return void +    if (OldFnRetTy == Type::getVoidTy(Context)) { +      ReturnInst::Create(Context, 0, TheSwitch);  // Return void      } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {        // return what we have -      ReturnInst::Create(TheSwitch->getCondition(), TheSwitch); +      ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);      } else {        // Otherwise we must have code extracted an unwind or something, just        // return whatever we want. -      ReturnInst::Create(Constant::getNullValue(OldFnRetTy), TheSwitch); +      ReturnInst::Create(Context,  +                         Constant::getNullValue(OldFnRetTy), TheSwitch);      }      TheSwitch->eraseFromParent(); @@ -644,12 +696,14 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {    Function *oldFunction = header->getParent();    // This takes place of the original loop -  BasicBlock *codeReplacer = BasicBlock::Create("codeRepl", oldFunction, +  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(),  +                                                "codeRepl", oldFunction,                                                  header);    // The new function needs a root node because other nodes can branch to the    // head of the region, but the entry node of a function cannot have preds. -  BasicBlock *newFuncRoot = BasicBlock::Create("newFuncRoot"); +  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(),  +                                               "newFuncRoot");    newFuncRoot->getInstList().push_back(BranchInst::Create(header));    // Find inputs to, outputs from the code region. @@ -702,7 +756,8 @@ ExtractCodeRegion(const std::vector<BasicBlock*> &code) {    //  cerr << "OLD FUNCTION: " << *oldFunction;    //  verifyFunction(*oldFunction); -  DEBUG(if (verifyFunction(*newFunction)) abort()); +  DEBUG(if (verifyFunction(*newFunction))  +        llvm_report_error("verifyFunction failed!"));    return newFunction;  } diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index b8dd75413342..c908b4a55914 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -39,7 +39,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,    // Create a stack slot to hold the value.    AllocaInst *Slot;    if (AllocaPoint) { -    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", AllocaPoint); +    Slot = new AllocaInst(I.getType(), 0, +                          I.getName()+".reg2mem", AllocaPoint);    } else {      Function *F = I.getParent()->getParent();      Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", @@ -116,7 +117,8 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {    // Create a stack slot to hold the value.    AllocaInst *Slot;    if (AllocaPoint) { -    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", AllocaPoint); +    Slot = new AllocaInst(P->getType(), 0, +                          P->getName()+".reg2mem", AllocaPoint);    } else {      Function *F = P->getParent()->getParent();      Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 4989c00ceb81..0d00d69c8cb9 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -15,6 +15,7 @@  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Constants.h"  #include "llvm/DerivedTypes.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h" @@ -28,13 +29,73 @@  #include "llvm/Support/CallSite.h"  using namespace llvm; -bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD) { -  return InlineFunction(CallSite(CI), CG, TD); +bool llvm::InlineFunction(CallInst *CI, CallGraph *CG, const TargetData *TD, +                          SmallVectorImpl<AllocaInst*> *StaticAllocas) { +  return InlineFunction(CallSite(CI), CG, TD, StaticAllocas);  } -bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) { -  return InlineFunction(CallSite(II), CG, TD); +bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD, +                          SmallVectorImpl<AllocaInst*> *StaticAllocas) { +  return InlineFunction(CallSite(II), CG, TD, StaticAllocas);  } + +/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into +/// an invoke, we have to turn all of the calls that can throw into +/// invokes.  This function analyze BB to see if there are any calls, and if so, +/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI +/// nodes in that block with the values specified in InvokeDestPHIValues. +/// +static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, +                                                   BasicBlock *InvokeDest, +                           const SmallVectorImpl<Value*> &InvokeDestPHIValues) { +  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { +    Instruction *I = BBI++; +     +    // We only need to check for function calls: inlined invoke +    // instructions require no special handling. +    CallInst *CI = dyn_cast<CallInst>(I); +    if (CI == 0) continue; +     +    // If this call cannot unwind, don't convert it to an invoke. +    if (CI->doesNotThrow()) +      continue; +     +    // Convert this function call into an invoke instruction. +    // First, split the basic block. +    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); +     +    // Next, create the new invoke instruction, inserting it at the end +    // of the old basic block. +    SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); +    InvokeInst *II = +      InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, +                         InvokeArgs.begin(), InvokeArgs.end(), +                         CI->getName(), BB->getTerminator()); +    II->setCallingConv(CI->getCallingConv()); +    II->setAttributes(CI->getAttributes()); +     +    // Make sure that anything using the call now uses the invoke!  This also +    // updates the CallGraph if present. +    CI->replaceAllUsesWith(II); +     +    // Delete the unconditional branch inserted by splitBasicBlock +    BB->getInstList().pop_back(); +    Split->getInstList().pop_front();  // Delete the original call +     +    // Update any PHI nodes in the exceptional block to indicate that +    // there is now a new entry in them. +    unsigned i = 0; +    for (BasicBlock::iterator I = InvokeDest->begin(); +         isa<PHINode>(I); ++I, ++i) +      cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB); +     +    // This basic block is now complete, the caller will continue scanning the +    // next one. +    return; +  } +} +   +  /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls  /// in the body of the inlined function into invokes and turn unwind  /// instructions into branches to the invoke unwind dest. @@ -43,10 +104,9 @@ bool llvm::InlineFunction(InvokeInst *II, CallGraph *CG, const TargetData *TD) {  /// block of the inlined code (the last block is the end of the function),  /// and InlineCodeInfo is information about the code that got inlined.  static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, -                                ClonedCodeInfo &InlinedCodeInfo, -                                CallGraph *CG) { +                                ClonedCodeInfo &InlinedCodeInfo) {    BasicBlock *InvokeDest = II->getUnwindDest(); -  std::vector<Value*> InvokeDestPHIValues; +  SmallVector<Value*, 8> InvokeDestPHIValues;    // If there are PHI nodes in the unwind destination block, we need to    // keep track of which values came into them from this invoke, then remove @@ -62,92 +122,39 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,    // The inlined code is currently at the end of the function, scan from the    // start of the inlined code to its end, checking for stuff we need to -  // rewrite. -  if (InlinedCodeInfo.ContainsCalls || InlinedCodeInfo.ContainsUnwinds) { -    for (Function::iterator BB = FirstNewBlock, E = Caller->end(); -         BB != E; ++BB) { -      if (InlinedCodeInfo.ContainsCalls) { -        for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ){ -          Instruction *I = BBI++; - -          // We only need to check for function calls: inlined invoke -          // instructions require no special handling. -          if (!isa<CallInst>(I)) continue; -          CallInst *CI = cast<CallInst>(I); - -          // If this call cannot unwind, don't convert it to an invoke. -          if (CI->doesNotThrow()) -            continue; - -          // Convert this function call into an invoke instruction. -          // First, split the basic block. -          BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc"); - -          // Next, create the new invoke instruction, inserting it at the end -          // of the old basic block. -          SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end()); -          InvokeInst *II = -            InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, -                               InvokeArgs.begin(), InvokeArgs.end(), -                               CI->getName(), BB->getTerminator()); -          II->setCallingConv(CI->getCallingConv()); -          II->setAttributes(CI->getAttributes()); - -          // Make sure that anything using the call now uses the invoke! -          CI->replaceAllUsesWith(II); - -          // Update the callgraph. -          if (CG) { -            // We should be able to do this: -            //   (*CG)[Caller]->replaceCallSite(CI, II); -            // but that fails if the old call site isn't in the call graph, -            // which, because of LLVM bug 3601, it sometimes isn't. -            CallGraphNode *CGN = (*CG)[Caller]; -            for (CallGraphNode::iterator NI = CGN->begin(), NE = CGN->end(); -                 NI != NE; ++NI) { -              if (NI->first == CI) { -                NI->first = II; -                break; -              } -            } -          } - -          // Delete the unconditional branch inserted by splitBasicBlock -          BB->getInstList().pop_back(); -          Split->getInstList().pop_front();  // Delete the original call - -          // Update any PHI nodes in the exceptional block to indicate that -          // there is now a new entry in them. -          unsigned i = 0; -          for (BasicBlock::iterator I = InvokeDest->begin(); -               isa<PHINode>(I); ++I, ++i) { -            PHINode *PN = cast<PHINode>(I); -            PN->addIncoming(InvokeDestPHIValues[i], BB); -          } - -          // This basic block is now complete, start scanning the next one. -          break; -        } -      } - -      if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { -        // An UnwindInst requires special handling when it gets inlined into an -        // invoke site.  Once this happens, we know that the unwind would cause -        // a control transfer to the invoke exception destination, so we can -        // transform it into a direct branch to the exception destination. -        BranchInst::Create(InvokeDest, UI); - -        // Delete the unwind instruction! -        UI->eraseFromParent(); - -        // Update any PHI nodes in the exceptional block to indicate that -        // there is now a new entry in them. -        unsigned i = 0; -        for (BasicBlock::iterator I = InvokeDest->begin(); -             isa<PHINode>(I); ++I, ++i) { -          PHINode *PN = cast<PHINode>(I); -          PN->addIncoming(InvokeDestPHIValues[i], BB); -        } +  // rewrite.  If the code doesn't have calls or unwinds, we know there is +  // nothing to rewrite. +  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) { +    // Now that everything is happy, we have one final detail.  The PHI nodes in +    // the exception destination block still have entries due to the original +    // invoke instruction.  Eliminate these entries (which might even delete the +    // PHI node) now. +    InvokeDest->removePredecessor(II->getParent()); +    return; +  } +   +  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ +    if (InlinedCodeInfo.ContainsCalls) +      HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest, +                                             InvokeDestPHIValues); + +    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { +      // An UnwindInst requires special handling when it gets inlined into an +      // invoke site.  Once this happens, we know that the unwind would cause +      // a control transfer to the invoke exception destination, so we can +      // transform it into a direct branch to the exception destination. +      BranchInst::Create(InvokeDest, UI); + +      // Delete the unwind instruction! +      UI->eraseFromParent(); + +      // Update any PHI nodes in the exceptional block to indicate that +      // there is now a new entry in them. +      unsigned i = 0; +      for (BasicBlock::iterator I = InvokeDest->begin(); +           isa<PHINode>(I); ++I, ++i) { +        PHINode *PN = cast<PHINode>(I); +        PN->addIncoming(InvokeDestPHIValues[i], BB);        }      }    } @@ -185,17 +192,19 @@ static void UpdateCallGraphAfterInlining(CallSite CS,    }    for (; I != E; ++I) { -    const Instruction *OrigCall = I->first.getInstruction(); +    const Value *OrigCall = I->first;      DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);      // Only copy the edge if the call was inlined! -    if (VMI != ValueMap.end() && VMI->second) { -      // If the call was inlined, but then constant folded, there is no edge to -      // add.  Check for this case. -      if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second)) -        CallerNode->addCalledFunction(CallSite::get(NewCall), I->second); -    } +    if (VMI == ValueMap.end() || VMI->second == 0) +      continue; +     +    // If the call was inlined, but then constant folded, there is no edge to +    // add.  Check for this case. +    if (Instruction *NewCall = dyn_cast<Instruction>(VMI->second)) +      CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);    } +      // Update the call graph by deleting the edge from Callee to Caller.  We must    // do this after the loop above in case Caller and Callee are the same.    CallerNode->removeCallEdgeFor(CS); @@ -204,25 +213,27 @@ static void UpdateCallGraphAfterInlining(CallSite CS,  /// findFnRegionEndMarker - This is a utility routine that is used by  /// InlineFunction. Return llvm.dbg.region.end intrinsic that corresponds  /// to the llvm.dbg.func.start of the function F. Otherwise return NULL. +///  static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) { -  GlobalVariable *FnStart = NULL; +  MDNode *FnStart = NULL;    const DbgRegionEndInst *FnEnd = NULL;    for (Function::const_iterator FI = F->begin(), FE =F->end(); FI != FE; ++FI)       for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); BI != BE;           ++BI) {        if (FnStart == NULL)  {          if (const DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI)) { -          DISubprogram SP(cast<GlobalVariable>(FSI->getSubprogram())); +          DISubprogram SP(FSI->getSubprogram());            assert (SP.isNull() == false && "Invalid llvm.dbg.func.start");            if (SP.describes(F)) -            FnStart = SP.getGV(); +            FnStart = SP.getNode();          } -      } else { -        if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI)) -          if (REI->getContext() == FnStart) -            FnEnd = REI; +        continue;        } +       +      if (const DbgRegionEndInst *REI = dyn_cast<DbgRegionEndInst>(BI)) +        if (REI->getContext() == FnStart) +          FnEnd = REI;      }    return FnEnd;  } @@ -236,8 +247,10 @@ static const DbgRegionEndInst *findFnRegionEndMarker(const Function *F) {  // exists in the instruction stream.  Similiarly this will inline a recursive  // function by one level.  // -bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { +bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD, +                          SmallVectorImpl<AllocaInst*> *StaticAllocas) {    Instruction *TheCall = CS.getInstruction(); +  LLVMContext &Context = TheCall->getContext();    assert(TheCall->getParent() && TheCall->getParent()->getParent() &&           "Instruction not in function!"); @@ -277,7 +290,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {    // Make sure to capture all of the return instructions from the cloned    // function. -  std::vector<ReturnInst*> Returns; +  SmallVector<ReturnInst*, 8> Returns;    ClonedCodeInfo InlinedFunctionInfo;    Function::iterator FirstNewBlock; @@ -302,15 +315,17 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {        if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) &&            !CalledFunc->onlyReadsMemory()) {          const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); -        const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); +        const Type *VoidPtrTy =  +            Type::getInt8PtrTy(Context);          // Create the alloca.  If we have TargetData, use nice alignment.          unsigned Align = 1;          if (TD) Align = TD->getPrefTypeAlignment(AggTy); -        Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), -                                          Caller->begin()->begin()); +        Value *NewAlloca = new AllocaInst(AggTy, 0, Align,  +                                          I->getName(),  +                                          &*Caller->begin()->begin());          // Emit a memcpy. -        const Type *Tys[] = { Type::Int64Ty }; +        const Type *Tys[] = { Type::getInt64Ty(Context) };          Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),                                                         Intrinsic::memcpy,                                                          Tys, 1); @@ -321,13 +336,15 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {          if (TD == 0)            Size = ConstantExpr::getSizeOf(AggTy);          else -          Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy)); +          Size = ConstantInt::get(Type::getInt64Ty(Context), +                                         TD->getTypeStoreSize(AggTy));          // Always generate a memcpy of alignment 1 here because we don't know          // the alignment of the src pointer.  Other optimizations can infer          // better alignment.          Value *CallArgs[] = { -          DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1) +          DestCast, SrcCast, Size, +          ConstantInt::get(Type::getInt32Ty(Context), 1)          };          CallInst *TheMemCpy =            CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); @@ -352,13 +369,12 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {      // call site. The function body cloner does not clone original      // region end marker from the CalledFunc. This will ensure that      // inlined function's scope ends at the right place.  -    const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc); -    if (DREI) { -      for (BasicBlock::iterator BI = TheCall,  -             BE = TheCall->getParent()->end(); BI != BE; ++BI) { +    if (const DbgRegionEndInst *DREI = findFnRegionEndMarker(CalledFunc)) { +      for (BasicBlock::iterator BI = TheCall, BE = TheCall->getParent()->end(); +           BI != BE; ++BI) {          if (DbgStopPointInst *DSPI = dyn_cast<DbgStopPointInst>(BI)) {            if (DbgRegionEndInst *NewDREI =  -              dyn_cast<DbgRegionEndInst>(DREI->clone())) +                dyn_cast<DbgRegionEndInst>(DREI->clone()))              NewDREI->insertAfter(DSPI);            break;          } @@ -388,31 +404,39 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {    {      BasicBlock::iterator InsertPoint = Caller->begin()->begin();      for (BasicBlock::iterator I = FirstNewBlock->begin(), -           E = FirstNewBlock->end(); I != E; ) -      if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) { -        // If the alloca is now dead, remove it.  This often occurs due to code -        // specialization. -        if (AI->use_empty()) { -          AI->eraseFromParent(); -          continue; -        } +         E = FirstNewBlock->end(); I != E; ) { +      AllocaInst *AI = dyn_cast<AllocaInst>(I++); +      if (AI == 0) continue; +       +      // If the alloca is now dead, remove it.  This often occurs due to code +      // specialization. +      if (AI->use_empty()) { +        AI->eraseFromParent(); +        continue; +      } -        if (isa<Constant>(AI->getArraySize())) { -          // Scan for the block of allocas that we can move over, and move them -          // all at once. -          while (isa<AllocaInst>(I) && -                 isa<Constant>(cast<AllocaInst>(I)->getArraySize())) -            ++I; - -          // Transfer all of the allocas over in a block.  Using splice means -          // that the instructions aren't removed from the symbol table, then -          // reinserted. -          Caller->getEntryBlock().getInstList().splice( -              InsertPoint, -              FirstNewBlock->getInstList(), -              AI, I); -        } +      if (!isa<Constant>(AI->getArraySize())) +        continue; +       +      // Keep track of the static allocas that we inline into the caller if the +      // StaticAllocas pointer is non-null. +      if (StaticAllocas) StaticAllocas->push_back(AI); +       +      // Scan for the block of allocas that we can move over, and move them +      // all at once. +      while (isa<AllocaInst>(I) && +             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { +        if (StaticAllocas) StaticAllocas->push_back(cast<AllocaInst>(I)); +        ++I;        } + +      // Transfer all of the allocas over in a block.  Using splice means +      // that the instructions aren't removed from the symbol table, then +      // reinserted. +      Caller->getEntryBlock().getInstList().splice(InsertPoint, +                                                   FirstNewBlock->getInstList(), +                                                   AI, I); +    }    }    // If the inlined code contained dynamic alloca instructions, wrap the inlined @@ -486,7 +510,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {           BB != E; ++BB) {        TerminatorInst *Term = BB->getTerminator();        if (isa<UnwindInst>(Term)) { -        new UnreachableInst(Term); +        new UnreachableInst(Context, Term);          BB->getInstList().erase(Term);        }      } @@ -495,7 +519,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) {    // any inlined 'unwind' instructions into branches to the invoke exception    // destination, and call instructions into invoke instructions.    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) -    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo, CG); +    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);    // If we cloned in _exactly one_ basic block, and if that block ends in a    // return instruction, we splice the body of the inlined callee directly into diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 4f8a1603948a..1fa51a3b6a71 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -32,7 +32,7 @@ namespace {      bool runOnFunction(Function &F) {        for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();             AI != AE; ++AI) -        if (!AI->hasName() && AI->getType() != Type::VoidTy) +        if (!AI->hasName() && AI->getType() != Type::getVoidTy(F.getContext()))            AI->setName("tmp");        for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { @@ -40,7 +40,7 @@ namespace {            BB->setName("BB");          for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) -          if (!I->hasName() && I->getType() != Type::VoidTy) +          if (!I->hasName() && I->getType() != Type::getVoidTy(F.getContext()))              I->setName("tmp");        }        return true; diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index d5e7303a5070..56e662e9dac1 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -33,22 +33,19 @@  #include "llvm/Pass.h"  #include "llvm/Function.h"  #include "llvm/Instructions.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/Dominators.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h"  #include "llvm/Support/PredIteratorCache.h" -#include <algorithm> -#include <map>  using namespace llvm;  STATISTIC(NumLCSSA, "Number of live out of a loop variables");  namespace { -  struct VISIBILITY_HIDDEN LCSSA : public LoopPass { +  struct LCSSA : public LoopPass {      static char ID; // Pass identification, replacement for typeid      LCSSA() : LoopPass(&ID) {} @@ -57,12 +54,10 @@ namespace {      DominatorTree *DT;      std::vector<BasicBlock*> LoopBlocks;      PredIteratorCache PredCache; +    Loop *L;      virtual bool runOnLoop(Loop *L, LPPassManager &LPM); -    void ProcessInstruction(Instruction* Instr, -                            const SmallVector<BasicBlock*, 8>& exitBlocks); -          /// This transformation requires natural loop information & requires that      /// loop preheaders be inserted into the CFG.  It maintains both of these,      /// as well as the CFG.  It also requires dominator information. @@ -71,9 +66,9 @@ namespace {        AU.setPreservesCFG();        AU.addRequiredID(LoopSimplifyID);        AU.addPreservedID(LoopSimplifyID); -      AU.addRequired<LoopInfo>(); +      AU.addRequiredTransitive<LoopInfo>();        AU.addPreserved<LoopInfo>(); -      AU.addRequired<DominatorTree>(); +      AU.addRequiredTransitive<DominatorTree>();        AU.addPreserved<ScalarEvolution>();        AU.addPreserved<DominatorTree>(); @@ -85,15 +80,17 @@ namespace {        AU.addPreserved<DominanceFrontier>();      }    private: -    void getLoopValuesUsedOutsideLoop(Loop *L, -                                      SetVector<Instruction*> &AffectedValues, -                                 const SmallVector<BasicBlock*, 8>& exitBlocks); - -    Value *GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, -                            DenseMap<DomTreeNode*, Value*> &Phis); +    bool ProcessInstruction(Instruction *Inst, +                            const SmallVectorImpl<BasicBlock*> &ExitBlocks); +     +    /// verifyAnalysis() - Verify loop nest. +    virtual void verifyAnalysis() const { +      // Check the special guarantees that LCSSA makes. +      assert(L->isLCSSAForm() && "LCSSA form not preserved!"); +    }      /// inLoop - returns true if the given block is within the current loop -    bool inLoop(BasicBlock* B) { +    bool inLoop(BasicBlock *B) const {        return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);      }    }; @@ -105,181 +102,163 @@ static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");  Pass *llvm::createLCSSAPass() { return new LCSSA(); }  const PassInfo *const llvm::LCSSAID = &X; + +/// BlockDominatesAnExit - Return true if the specified block dominates at least +/// one of the blocks in the specified list. +static bool BlockDominatesAnExit(BasicBlock *BB, +                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks, +                                 DominatorTree *DT) { +  DomTreeNode *DomNode = DT->getNode(BB); +  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) +    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i]))) +      return true; + +  return false; +} + +  /// runOnFunction - Process all loops in the function, inner-most out. -bool LCSSA::runOnLoop(Loop *L, LPPassManager &LPM) { -  PredCache.clear(); +bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) { +  L = TheLoop;    LI = &LPM.getAnalysis<LoopInfo>();    DT = &getAnalysis<DominatorTree>(); -  // Speed up queries by creating a sorted list of blocks +  // Get the set of exiting blocks. +  SmallVector<BasicBlock*, 8> ExitBlocks; +  L->getExitBlocks(ExitBlocks); +   +  if (ExitBlocks.empty()) +    return false; +   +  // Speed up queries by creating a sorted vector of blocks.    LoopBlocks.clear();    LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); -  std::sort(LoopBlocks.begin(), LoopBlocks.end()); +  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end()); -  SmallVector<BasicBlock*, 8> exitBlocks; -  L->getExitBlocks(exitBlocks); +  // Look at all the instructions in the loop, checking to see if they have uses +  // outside the loop.  If so, rewrite those uses. +  bool MadeChange = false; -  SetVector<Instruction*> AffectedValues; -  getLoopValuesUsedOutsideLoop(L, AffectedValues, exitBlocks); +  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end(); +       BBI != E; ++BBI) { +    BasicBlock *BB = *BBI; +     +    // For large loops, avoid use-scanning by using dominance information:  In +    // particular, if a block does not dominate any of the loop exits, then none +    // of the values defined in the block could be used outside the loop. +    if (!BlockDominatesAnExit(BB, ExitBlocks, DT)) +      continue; +     +    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); +         I != E; ++I) { +      // Reject two common cases fast: instructions with no uses (like stores) +      // and instructions with one use that is in the same block as this. +      if (I->use_empty() || +          (I->hasOneUse() && I->use_back()->getParent() == BB && +           !isa<PHINode>(I->use_back()))) +        continue; +       +      MadeChange |= ProcessInstruction(I, ExitBlocks); +    } +  } -  // If no values are affected, we can save a lot of work, since we know that -  // nothing will be changed. -  if (AffectedValues.empty()) -    return false; +  assert(L->isLCSSAForm()); +  PredCache.clear(); + +  return MadeChange; +} + +/// isExitBlock - Return true if the specified block is in the list. +static bool isExitBlock(BasicBlock *BB, +                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) { +  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) +    if (ExitBlocks[i] == BB) +      return true; +  return false; +} + +/// ProcessInstruction - Given an instruction in the loop, check to see if it +/// has any uses that are outside the current loop.  If so, insert LCSSA PHI +/// nodes and rewrite the uses. +bool LCSSA::ProcessInstruction(Instruction *Inst, +                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) { +  SmallVector<Use*, 16> UsesToRewrite; -  // Iterate over all affected values for this loop and insert Phi nodes -  // for them in the appropriate exit blocks +  BasicBlock *InstBB = Inst->getParent(); -  for (SetVector<Instruction*>::iterator I = AffectedValues.begin(), -       E = AffectedValues.end(); I != E; ++I) -    ProcessInstruction(*I, exitBlocks); +  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); +       UI != E; ++UI) { +    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); +    if (PHINode *PN = dyn_cast<PHINode>(*UI)) +      UserBB = PN->getIncomingBlock(UI); +     +    if (InstBB != UserBB && !inLoop(UserBB)) +      UsesToRewrite.push_back(&UI.getUse()); +  } -  assert(L->isLCSSAForm()); +  // If there are no uses outside the loop, exit with no change. +  if (UsesToRewrite.empty()) return false; -  return true; -} - -/// processInstruction - Given a live-out instruction, insert LCSSA Phi nodes, -/// eliminate all out-of-loop uses. -void LCSSA::ProcessInstruction(Instruction *Instr, -                               const SmallVector<BasicBlock*, 8>& exitBlocks) {    ++NumLCSSA; // We are applying the transformation -  // Keep track of the blocks that have the value available already. -  DenseMap<DomTreeNode*, Value*> Phis; - -  BasicBlock *DomBB = Instr->getParent(); -    // Invoke instructions are special in that their result value is not available    // along their unwind edge. The code below tests to see whether DomBB dominates    // the value, so adjust DomBB to the normal destination block, which is    // effectively where the value is first usable. -  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Instr)) +  BasicBlock *DomBB = Inst->getParent(); +  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))      DomBB = Inv->getNormalDest();    DomTreeNode *DomNode = DT->getNode(DomBB); -  // Insert the LCSSA phi's into the exit blocks (dominated by the value), and -  // add them to the Phi's map. -  for (SmallVector<BasicBlock*, 8>::const_iterator BBI = exitBlocks.begin(), -      BBE = exitBlocks.end(); BBI != BBE; ++BBI) { -    BasicBlock *BB = *BBI; -    DomTreeNode *ExitBBNode = DT->getNode(BB); -    Value *&Phi = Phis[ExitBBNode]; -    if (!Phi && DT->dominates(DomNode, ExitBBNode)) { -      PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa", -                                    BB->begin()); -      PN->reserveOperandSpace(PredCache.GetNumPreds(BB)); - -      // Remember that this phi makes the value alive in this block. -      Phi = PN; - -      // Add inputs from inside the loop for this PHI. -      for (BasicBlock** PI = PredCache.GetPreds(BB); *PI; ++PI) -        PN->addIncoming(Instr, *PI); -    } -  } +  SSAUpdater SSAUpdate; +  SSAUpdate.Initialize(Inst); -   -  // Record all uses of Instr outside the loop.  We need to rewrite these.  The -  // LCSSA phis won't be included because they use the value in the loop. -  for (Value::use_iterator UI = Instr->use_begin(), E = Instr->use_end(); -       UI != E;) { -    BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); -    if (PHINode *P = dyn_cast<PHINode>(*UI)) { -      UserBB = P->getIncomingBlock(UI); -    } +  // Insert the LCSSA phi's into all of the exit blocks dominated by the +  // value., and add them to the Phi's map. +  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(), +      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) { +    BasicBlock *ExitBB = *BBI; +    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue; -    // If the user is in the loop, don't rewrite it! -    if (UserBB == Instr->getParent() || inLoop(UserBB)) { -      ++UI; -      continue; -    } +    // If we already inserted something for this BB, don't reprocess it. +    if (SSAUpdate.HasValueForBlock(ExitBB)) continue; -    // Otherwise, patch up uses of the value with the appropriate LCSSA Phi, -    // inserting PHI nodes into join points where needed. -    Value *Val = GetValueForBlock(DT->getNode(UserBB), Instr, Phis); -     -    // Preincrement the iterator to avoid invalidating it when we change the -    // value. -    Use &U = UI.getUse(); -    ++UI; -    U.set(Val); -  } -} +    PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa", +                                  ExitBB->begin()); +    PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); -/// getLoopValuesUsedOutsideLoop - Return any values defined in the loop that -/// are used by instructions outside of it. -void LCSSA::getLoopValuesUsedOutsideLoop(Loop *L, -                                      SetVector<Instruction*> &AffectedValues, -                                const SmallVector<BasicBlock*, 8>& exitBlocks) { -  // FIXME: For large loops, we may be able to avoid a lot of use-scanning -  // by using dominance information.  In particular, if a block does not -  // dominate any of the loop exits, then none of the values defined in the -  // block could be used outside the loop. -  for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end(); -       BB != BE; ++BB) { -    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ++I) -      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; -           ++UI) { -        BasicBlock *UserBB = cast<Instruction>(*UI)->getParent(); -        if (PHINode* p = dyn_cast<PHINode>(*UI)) { -          UserBB = p->getIncomingBlock(UI); -        } -         -        if (*BB != UserBB && !inLoop(UserBB)) { -          AffectedValues.insert(I); -          break; -        } -      } +    // Add inputs from inside the loop for this PHI. +    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) +      PN->addIncoming(Inst, *PI); +     +    // Remember that this phi makes the value alive in this block. +    SSAUpdate.AddAvailableValue(ExitBB, PN);    } -} - -/// GetValueForBlock - Get the value to use within the specified basic block. -/// available values are in Phis. -Value *LCSSA::GetValueForBlock(DomTreeNode *BB, Instruction *OrigInst, -                               DenseMap<DomTreeNode*, Value*> &Phis) { -  // If there is no dominator info for this BB, it is unreachable. -  if (BB == 0) -    return UndefValue::get(OrigInst->getType()); -                                  -  // If we have already computed this value, return the previously computed val. -  if (Phis.count(BB)) return Phis[BB]; - -  DomTreeNode *IDom = BB->getIDom(); +   +  // Rewrite all uses outside the loop in terms of the new PHIs we just +  // inserted. +  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) { +    // If this use is in an exit block, rewrite to use the newly inserted PHI. +    // This is required for correctness because SSAUpdate doesn't handle uses in +    // the same block.  It assumes the PHI we inserted is at the end of the +    // block. +    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser()); +    BasicBlock *UserBB = User->getParent(); +    if (PHINode *PN = dyn_cast<PHINode>(User)) +      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]); -  // Otherwise, there are two cases: we either have to insert a PHI node or we -  // don't.  We need to insert a PHI node if this block is not dominated by one -  // of the exit nodes from the loop (the loop could have multiple exits, and -  // though the value defined *inside* the loop dominated all its uses, each -  // exit by itself may not dominate all the uses). -  // -  // The simplest way to check for this condition is by checking to see if the -  // idom is in the loop.  If so, we *know* that none of the exit blocks -  // dominate this block.  Note that we *know* that the block defining the -  // original instruction is in the idom chain, because if it weren't, then the -  // original value didn't dominate this use. -  if (!inLoop(IDom->getBlock())) { -    // Idom is not in the loop, we must still be "below" the exit block and must -    // be fully dominated by the value live in the idom. -    Value* val = GetValueForBlock(IDom, OrigInst, Phis); -    Phis.insert(std::make_pair(BB, val)); -    return val; +    if (isa<PHINode>(UserBB->begin()) && +        isExitBlock(UserBB, ExitBlocks)) { +      UsesToRewrite[i]->set(UserBB->begin()); +      continue; +    } +     +    // Otherwise, do full PHI insertion. +    SSAUpdate.RewriteUse(*UsesToRewrite[i]);    } -  BasicBlock *BBN = BB->getBlock(); -   -  // Otherwise, the idom is the loop, so we need to insert a PHI node.  Do so -  // now, then get values to fill in the incoming values for the PHI. -  PHINode *PN = PHINode::Create(OrigInst->getType(), -                                OrigInst->getName() + ".lcssa", BBN->begin()); -  PN->reserveOperandSpace(PredCache.GetNumPreds(BBN)); -  Phis.insert(std::make_pair(BB, PN)); -                                  -  // Fill in the incoming values for the block. -  for (BasicBlock** PI = PredCache.GetPreds(BBN); *PI; ++PI) -    PN->addIncoming(GetValueForBlock(DT->getNode(*PI), OrigInst, Phis), *PI); -  return PN; +  return true;  } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 8c08638c4c3d..b62261119c75 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -20,9 +20,11 @@  #include "llvm/Instructions.h"  #include "llvm/Intrinsics.h"  #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/ProfileInfo.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Support/GetElementPtrTypeIterator.h"  #include "llvm/Support/MathExtras.h" @@ -183,8 +185,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {      } else if (SI->getNumSuccessors() == 2) {        // Otherwise, we can fold this switch into a conditional branch        // instruction if it has only one non-default destination. -      Value *Cond = new ICmpInst(ICmpInst::ICMP_EQ, SI->getCondition(), -                                 SI->getSuccessorValue(1), "cond", SI); +      Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(), +                                 SI->getSuccessorValue(1), "cond");        // Insert the new branch...        BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI); @@ -262,7 +264,6 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {  /// too, recursively.  void  llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { -    // We can remove a PHI if it is on a cycle in the def-use graph    // where each node in the cycle has degree one, i.e. only one use,    // and is an instruction with no side effects. @@ -294,7 +295,7 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {  /// between them, moving the instructions in the predecessor into DestBB and  /// deleting the predecessor block.  /// -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {    // If BB has single-entry PHI nodes, fold them.    while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {      Value *NewVal = PN->getIncomingValue(0); @@ -314,6 +315,13 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB) {    // Anything that branched to PredBB now branches to DestBB.    PredBB->replaceAllUsesWith(DestBB); +  if (P) { +    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>(); +    if (PI) { +      PI->replaceAllUses(PredBB, DestBB); +      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB)); +    } +  }    // Nuke BB.    PredBB->eraseFromParent();  } diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index d6b167f8b848..c22708a92b7a 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,10 +37,12 @@  #include "llvm/Constants.h"  #include "llvm/Instructions.h"  #include "llvm/Function.h" +#include "llvm/LLVMContext.h"  #include "llvm/Type.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Local.h"  #include "llvm/Support/CFG.h" @@ -55,44 +57,42 @@ STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");  STATISTIC(NumNested  , "Number of nested loops split out");  namespace { -  struct VISIBILITY_HIDDEN LoopSimplify : public FunctionPass { +  struct VISIBILITY_HIDDEN LoopSimplify : public LoopPass {      static char ID; // Pass identification, replacement for typeid -    LoopSimplify() : FunctionPass(&ID) {} +    LoopSimplify() : LoopPass(&ID) {}      // AA - If we have an alias analysis object to update, this is it, otherwise      // this is null.      AliasAnalysis *AA;      LoopInfo *LI;      DominatorTree *DT; -    virtual bool runOnFunction(Function &F); +    Loop *L; +    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);      virtual void getAnalysisUsage(AnalysisUsage &AU) const {        // We need loop information to identify the loops... -      AU.addRequired<LoopInfo>(); -      AU.addRequired<DominatorTree>(); +      AU.addRequiredTransitive<LoopInfo>(); +      AU.addRequiredTransitive<DominatorTree>();        AU.addPreserved<LoopInfo>();        AU.addPreserved<DominatorTree>();        AU.addPreserved<DominanceFrontier>();        AU.addPreserved<AliasAnalysis>(); +      AU.addPreserved<ScalarEvolution>();        AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.      }      /// verifyAnalysis() - Verify loop nest.      void verifyAnalysis() const { -#ifndef NDEBUG -      LoopInfo *NLI = &getAnalysis<LoopInfo>(); -      for (LoopInfo::iterator I = NLI->begin(), E = NLI->end(); I != E; ++I)  -        (*I)->verifyLoop(); -#endif   +      assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!");      }    private: -    bool ProcessLoop(Loop *L); +    bool ProcessLoop(Loop *L, LPPassManager &LPM);      BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); -    void InsertPreheaderForLoop(Loop *L); -    Loop *SeparateNestedLoop(Loop *L); -    void InsertUniqueBackedgeBlock(Loop *L); +    BasicBlock *InsertPreheaderForLoop(Loop *L); +    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM); +    void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);      void PlaceSplitBlockCarefully(BasicBlock *NewBB,                                    SmallVectorImpl<BasicBlock*> &SplitPreds,                                    Loop *L); @@ -105,73 +105,19 @@ X("loopsimplify", "Canonicalize natural loops", true);  // Publically exposed interface to pass...  const PassInfo *const llvm::LoopSimplifyID = &X; -FunctionPass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }  /// runOnFunction - Run down all loops in the CFG (recursively, but we could do  /// it in any convenient order) inserting preheaders...  /// -bool LoopSimplify::runOnFunction(Function &F) { +bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) { +  L = l;    bool Changed = false;    LI = &getAnalysis<LoopInfo>();    AA = getAnalysisIfAvailable<AliasAnalysis>();    DT = &getAnalysis<DominatorTree>(); -  // Check to see that no blocks (other than the header) in loops have -  // predecessors that are not in loops.  This is not valid for natural loops, -  // but can occur if the blocks are unreachable.  Since they are unreachable we -  // can just shamelessly destroy their terminators to make them not branch into -  // the loop! -  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { -    // This case can only occur for unreachable blocks.  Blocks that are -    // unreachable can't be in loops, so filter those blocks out. -    if (LI->getLoopFor(BB)) continue; -     -    bool BlockUnreachable = false; -    TerminatorInst *TI = BB->getTerminator(); - -    // Check to see if any successors of this block are non-loop-header loops -    // that are not the header. -    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { -      // If this successor is not in a loop, BB is clearly ok. -      Loop *L = LI->getLoopFor(TI->getSuccessor(i)); -      if (!L) continue; -       -      // If the succ is the loop header, and if L is a top-level loop, then this -      // is an entrance into a loop through the header, which is also ok. -      if (L->getHeader() == TI->getSuccessor(i) && L->getParentLoop() == 0) -        continue; -       -      // Otherwise, this is an entrance into a loop from some place invalid. -      // Either the loop structure is invalid and this is not a natural loop (in -      // which case the compiler is buggy somewhere else) or BB is unreachable. -      BlockUnreachable = true; -      break; -    } -     -    // If this block is ok, check the next one. -    if (!BlockUnreachable) continue; -     -    // Otherwise, this block is dead.  To clean up the CFG and to allow later -    // loop transformations to ignore this case, we delete the edges into the -    // loop by replacing the terminator. -     -    // Remove PHI entries from the successors. -    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) -      TI->getSuccessor(i)->removePredecessor(BB); -    -    // Add a new unreachable instruction before the old terminator. -    new UnreachableInst(TI); -     -    // Delete the dead terminator. -    if (AA) AA->deleteValue(TI); -    if (!TI->use_empty()) -      TI->replaceAllUsesWith(UndefValue::get(TI->getType())); -    TI->eraseFromParent(); -    Changed |= true; -  } -   -  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) -    Changed |= ProcessLoop(*I); +  Changed |= ProcessLoop(L, LPM);    return Changed;  } @@ -179,21 +125,42 @@ bool LoopSimplify::runOnFunction(Function &F) {  /// ProcessLoop - Walk the loop structure in depth first order, ensuring that  /// all loops have preheaders.  /// -bool LoopSimplify::ProcessLoop(Loop *L) { +bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {    bool Changed = false;  ReprocessLoop: -   -  // Canonicalize inner loops before outer loops.  Inner loop canonicalization -  // can provide work for the outer loop to canonicalize. -  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) -    Changed |= ProcessLoop(*I); -   -  assert(L->getBlocks()[0] == L->getHeader() && -         "Header isn't first block in loop?"); + +  // Check to see that no blocks (other than the header) in this loop that has +  // predecessors that are not in the loop.  This is not valid for natural +  // loops, but can occur if the blocks are unreachable.  Since they are +  // unreachable we can just shamelessly delete those CFG edges! +  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); +       BB != E; ++BB) { +    if (*BB == L->getHeader()) continue; + +    SmallPtrSet<BasicBlock *, 4> BadPreds; +    for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) +      if (!L->contains(*PI)) +        BadPreds.insert(*PI); + +    // Delete each unique out-of-loop (and thus dead) predecessor. +    for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(), +         E = BadPreds.end(); I != E; ++I) { +      // Inform each successor of each dead pred. +      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI) +        (*SI)->removePredecessor(*I); +      // Zap the dead pred's terminator and replace it with unreachable. +      TerminatorInst *TI = (*I)->getTerminator(); +       TI->replaceAllUsesWith(UndefValue::get(TI->getType())); +      (*I)->getTerminator()->eraseFromParent(); +      new UnreachableInst((*I)->getContext(), *I); +      Changed = true; +    } +  }    // Does the loop already have a preheader?  If so, don't insert one. -  if (L->getLoopPreheader() == 0) { -    InsertPreheaderForLoop(L); +  BasicBlock *Preheader = L->getLoopPreheader(); +  if (!Preheader) { +    Preheader = InsertPreheaderForLoop(L);      NumInserted++;      Changed = true;    } @@ -229,10 +196,9 @@ ReprocessLoop:      // this for loops with a giant number of backedges, just factor them into a      // common backedge instead.      if (NumBackedges < 8) { -      if (Loop *NL = SeparateNestedLoop(L)) { +      if (SeparateNestedLoop(L, LPM)) {          ++NumNested;          // This is a big restructuring change, reprocess the whole loop. -        ProcessLoop(NL);          Changed = true;          // GCC doesn't tail recursion eliminate this.          goto ReprocessLoop; @@ -242,7 +208,7 @@ ReprocessLoop:      // If we either couldn't, or didn't want to, identify nesting of the loops,      // insert a new block that all backedges target, then make it jump to the      // loop header. -    InsertUniqueBackedgeBlock(L); +    InsertUniqueBackedgeBlock(L, Preheader);      NumInserted++;      Changed = true;    } @@ -253,7 +219,7 @@ ReprocessLoop:    PHINode *PN;    for (BasicBlock::iterator I = L->getHeader()->begin();         (PN = dyn_cast<PHINode>(I++)); ) -    if (Value *V = PN->hasConstantValue()) { +    if (Value *V = PN->hasConstantValue(DT)) {        if (AA) AA->deleteValue(PN);        PN->replaceAllUsesWith(V);        PN->eraseFromParent(); @@ -286,19 +252,10 @@ ReprocessLoop:          Instruction *Inst = I++;          if (Inst == CI)            continue; -        if (Inst->isTrapping()) { +        if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) {            AllInvariant = false;            break;          } -        for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j) -          if (!L->isLoopInvariant(Inst->getOperand(j))) { -            AllInvariant = false; -            break; -          } -        if (!AllInvariant) -          break; -        // Hoist. -        Inst->moveBefore(L->getLoopPreheader()->getTerminator());        }        if (!AllInvariant) continue; @@ -317,9 +274,10 @@ ReprocessLoop:        DomTreeNode *Node = DT->getNode(ExitingBlock);        const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =          Node->getChildren(); -      for (unsigned k = 0, g = Children.size(); k != g; ++k) { -        DT->changeImmediateDominator(Children[k], Node->getIDom()); -        if (DF) DF->changeImmediateDominator(Children[k]->getBlock(), +      while (!Children.empty()) { +        DomTreeNode *Child = Children.front(); +        DT->changeImmediateDominator(Child, Node->getIDom()); +        if (DF) DF->changeImmediateDominator(Child->getBlock(),                                               Node->getIDom()->getBlock(),                                               DT);        } @@ -339,7 +297,7 @@ ReprocessLoop:  /// preheader, this method is called to insert one.  This method has two phases:  /// preheader insertion and analysis updating.  /// -void LoopSimplify::InsertPreheaderForLoop(Loop *L) { +BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {    BasicBlock *Header = L->getHeader();    // Compute the set of predecessors of the loop that are not in the loop. @@ -353,19 +311,12 @@ void LoopSimplify::InsertPreheaderForLoop(Loop *L) {    BasicBlock *NewBB =      SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),                             ".preheader", this); -   - -  //===--------------------------------------------------------------------===// -  //  Update analysis results now that we have performed the transformation -  // - -  // We know that we have loop information to update... update it now. -  if (Loop *Parent = L->getParentLoop()) -    Parent->addBasicBlockToLoop(NewBB, LI->getBase());    // Make sure that NewBB is put someplace intelligent, which doesn't mess up    // code layout too horribly.    PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L); + +  return NewBB;  }  /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit @@ -382,17 +333,6 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {                                               LoopBlocks.size(), ".loopexit",                                               this); -  // Update Loop Information - we know that the new block will be in whichever -  // loop the Exit block is in.  Note that it may not be in that immediate loop, -  // if the successor is some other loop header.  In that case, we continue  -  // walking up the loop tree to find a loop that contains both the successor -  // block and the predecessor block. -  Loop *SuccLoop = LI->getLoopFor(Exit); -  while (SuccLoop && !SuccLoop->contains(L->getHeader())) -    SuccLoop = SuccLoop->getParentLoop(); -  if (SuccLoop) -    SuccLoop->addBasicBlockToLoop(NewBB, LI->getBase()); -    return NewBB;  } @@ -422,14 +362,13 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,    for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {      PHINode *PN = cast<PHINode>(I);      ++I; -    if (Value *V = PN->hasConstantValue()) -      if (!isa<Instruction>(V) || DT->dominates(cast<Instruction>(V), PN)) { -        // This is a degenerate PHI already, don't modify it! -        PN->replaceAllUsesWith(V); -        if (AA) AA->deleteValue(PN); -        PN->eraseFromParent(); -        continue; -      } +    if (Value *V = PN->hasConstantValue(DT)) { +      // This is a degenerate PHI already, don't modify it! +      PN->replaceAllUsesWith(V); +      if (AA) AA->deleteValue(PN); +      PN->eraseFromParent(); +      continue; +    }      // Scan this PHI node looking for a use of the PHI node by itself.      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) @@ -496,7 +435,7 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,  /// If we are able to separate out a loop, return the new outer loop that was  /// created.  /// -Loop *LoopSimplify::SeparateNestedLoop(Loop *L) { +Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {    PHINode *PN = FindPHIToPartitionLoops(L, DT, AA);    if (PN == 0) return 0;  // No known way to partition. @@ -527,17 +466,20 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {    else      LI->changeTopLevelLoop(L, NewOuter); -  // This block is going to be our new header block: add it to this loop and all -  // parent loops. -  NewOuter->addBasicBlockToLoop(NewBB, LI->getBase()); -    // L is now a subloop of our outer loop.    NewOuter->addChildLoop(L); +  // Add the new loop to the pass manager queue. +  LPM.insertLoopIntoQueue(NewOuter); +    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();         I != E; ++I)      NewOuter->addBlockEntry(*I); +  // Now reset the header in L, which had been moved by +  // SplitBlockPredecessors for the outer loop. +  L->moveToHeader(Header); +    // Determine which blocks should stay in L and which should be moved out to    // the Outer loop now.    std::set<BasicBlock*> BlocksInL; @@ -578,11 +520,10 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L) {  /// backedges to target a new basic block and have that block branch to the loop  /// header.  This ensures that loops have exactly one backedge.  /// -void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) { +void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {    assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");    // Get information about the loop -  BasicBlock *Preheader = L->getLoopPreheader();    BasicBlock *Header = L->getHeader();    Function *F = Header->getParent(); @@ -592,7 +533,8 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L) {      if (*I != Preheader) BackedgeBlocks.push_back(*I);    // Create and insert the new backedge block... -  BasicBlock *BEBlock = BasicBlock::Create(Header->getName()+".backedge", F); +  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), +                                           Header->getName()+".backedge", F);    BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);    // Move the new backedge block to right after the last backedge block. diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp index 74e7028d127c..f26d7c146ee3 100644 --- a/lib/Transforms/Utils/LowerAllocations.cpp +++ b/lib/Transforms/Utils/LowerAllocations.cpp @@ -19,6 +19,7 @@  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h"  #include "llvm/Constants.h" +#include "llvm/LLVMContext.h"  #include "llvm/Pass.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Target/TargetData.h" @@ -28,17 +29,17 @@ using namespace llvm;  STATISTIC(NumLowered, "Number of allocations lowered");  namespace { -  /// LowerAllocations - Turn malloc and free instructions into %malloc and -  /// %free calls. +  /// LowerAllocations - Turn malloc and free instructions into @malloc and +  /// @free calls.    ///    class VISIBILITY_HIDDEN LowerAllocations : public BasicBlockPass { -    Constant *MallocFunc;   // Functions in the module we are processing -    Constant *FreeFunc;     // Initialized by doInitialization +    Constant *FreeFunc;   // Functions in the module we are processing +                          // Initialized by doInitialization      bool LowerMallocArgToInteger;    public:      static char ID; // Pass ID, replacement for typeid      explicit LowerAllocations(bool LowerToInt = false) -      : BasicBlockPass(&ID), MallocFunc(0), FreeFunc(0),  +      : BasicBlockPass(&ID), FreeFunc(0),           LowerMallocArgToInteger(LowerToInt) {}      virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -86,12 +87,9 @@ Pass *llvm::createLowerAllocationsPass(bool LowerMallocArgToInteger) {  // This function is always successful.  //  bool LowerAllocations::doInitialization(Module &M) { -  const Type *BPTy = PointerType::getUnqual(Type::Int8Ty); -  // Prototype malloc as "char* malloc(...)", because we don't know in -  // doInitialization whether size_t is int or long. -  FunctionType *FT = FunctionType::get(BPTy, true); -  MallocFunc = M.getOrInsertFunction("malloc", FT); -  FreeFunc = M.getOrInsertFunction("free"  , Type::VoidTy, BPTy, (Type *)0); +  const Type *BPTy = Type::getInt8PtrTy(M.getContext()); +  FreeFunc = M.getOrInsertFunction("free"  , Type::getVoidTy(M.getContext()), +                                   BPTy, (Type *)0);    return true;  } @@ -100,57 +98,22 @@ bool LowerAllocations::doInitialization(Module &M) {  //  bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {    bool Changed = false; -  assert(MallocFunc && FreeFunc && "Pass not initialized!"); +  assert(FreeFunc && "Pass not initialized!");    BasicBlock::InstListType &BBIL = BB.getInstList();    const TargetData &TD = getAnalysis<TargetData>(); -  const Type *IntPtrTy = TD.getIntPtrType(); +  const Type *IntPtrTy = TD.getIntPtrType(BB.getContext());    // Loop over all of the instructions, looking for malloc or free instructions    for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) {      if (MallocInst *MI = dyn_cast<MallocInst>(I)) { -      const Type *AllocTy = MI->getType()->getElementType(); - -      // malloc(type) becomes i8 *malloc(size) -      Value *MallocArg; -      if (LowerMallocArgToInteger) -        MallocArg = ConstantInt::get(Type::Int64Ty, -                                     TD.getTypeAllocSize(AllocTy)); -      else -        MallocArg = ConstantExpr::getSizeOf(AllocTy); -      MallocArg = ConstantExpr::getTruncOrBitCast(cast<Constant>(MallocArg),  -                                                  IntPtrTy); - -      if (MI->isArrayAllocation()) { -        if (isa<ConstantInt>(MallocArg) && -            cast<ConstantInt>(MallocArg)->isOne()) { -          MallocArg = MI->getOperand(0);         // Operand * 1 = Operand -        } else if (Constant *CO = dyn_cast<Constant>(MI->getOperand(0))) { -          CO = ConstantExpr::getIntegerCast(CO, IntPtrTy, false /*ZExt*/); -          MallocArg = ConstantExpr::getMul(CO, cast<Constant>(MallocArg)); -        } else { -          Value *Scale = MI->getOperand(0); -          if (Scale->getType() != IntPtrTy) -            Scale = CastInst::CreateIntegerCast(Scale, IntPtrTy, false /*ZExt*/, -                                                "", I); - -          // Multiply it by the array size if necessary... -          MallocArg = BinaryOperator::Create(Instruction::Mul, Scale, -                                             MallocArg, "", I); -        } -      } - -      // Create the call to Malloc. -      CallInst *MCall = CallInst::Create(MallocFunc, MallocArg, "", I); -      MCall->setTailCall(); - -      // Create a cast instruction to convert to the right type... -      Value *MCast; -      if (MCall->getType() != Type::VoidTy) -        MCast = new BitCastInst(MCall, MI->getType(), "", I); -      else -        MCast = Constant::getNullValue(MI->getType()); +      Value *ArraySize = MI->getOperand(0); +      if (ArraySize->getType() != IntPtrTy) +        ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, +                                                false /*ZExt*/, "", I); +      Value *MCast = CallInst::CreateMalloc(I, IntPtrTy, +                                            MI->getAllocatedType(), ArraySize);        // Replace all uses of the old malloc inst with the cast inst        MI->replaceAllUsesWith(MCast); @@ -160,7 +123,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {      } else if (FreeInst *FI = dyn_cast<FreeInst>(I)) {        Value *PtrCast =           new BitCastInst(FI->getOperand(0), -                        PointerType::getUnqual(Type::Int8Ty), "", I); +               Type::getInt8PtrTy(BB.getContext()), "", I);        // Insert a call to the free function...        CallInst::Create(FreeFunc, PtrCast, "", I)->setTailCall(); diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 1f6b1a2a6846..9a3de2649244 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -40,6 +40,7 @@  #include "llvm/DerivedTypes.h"  #include "llvm/Instructions.h"  #include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h"  #include "llvm/Module.h"  #include "llvm/Pass.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -114,7 +115,8 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {  // doInitialization - Make sure that there is a prototype for abort in the  // current module.  bool LowerInvoke::doInitialization(Module &M) { -  const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); +  const Type *VoidPtrTy = +          Type::getInt8PtrTy(M.getContext());    AbortMessage = 0;    if (ExpensiveEHSupport) {      // Insert a type for the linked list of jump buffers. @@ -125,9 +127,9 @@ bool LowerInvoke::doInitialization(Module &M) {      { // The type is recursive, so use a type holder.        std::vector<const Type*> Elements;        Elements.push_back(JmpBufTy); -      OpaqueType *OT = OpaqueType::get(); +      OpaqueType *OT = OpaqueType::get(M.getContext());        Elements.push_back(PointerType::getUnqual(OT)); -      PATypeHolder JBLType(StructType::get(Elements)); +      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));        OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.        JBLinkTy = JBLType.get();        M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy); @@ -138,10 +140,10 @@ bool LowerInvoke::doInitialization(Module &M) {      // Now that we've done that, insert the jmpbuf list head global, unless it      // already exists.      if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) { -      JBListHead = new GlobalVariable(PtrJBList, false, +      JBListHead = new GlobalVariable(M, PtrJBList, false,                                        GlobalValue::LinkOnceAnyLinkage,                                        Constant::getNullValue(PtrJBList), -                                      "llvm.sjljeh.jblist", &M); +                                      "llvm.sjljeh.jblist");      }  // VisualStudio defines setjmp as _setjmp via #include <csetjmp> / <setjmp.h>, @@ -163,7 +165,8 @@ bool LowerInvoke::doInitialization(Module &M) {    }    // We need the 'write' and 'abort' functions for both models. -  AbortFn = M.getOrInsertFunction("abort", Type::VoidTy, (Type *)0); +  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), +                                  (Type *)0);  #if 0 // "write" is Unix-specific.. code is going away soon anyway.    WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty,                                    VoidPtrTy, Type::Int32Ty, (Type *)0); @@ -178,26 +181,30 @@ void LowerInvoke::createAbortMessage(Module *M) {      // The abort message for expensive EH support tells the user that the      // program 'unwound' without an 'invoke' instruction.      Constant *Msg = -      ConstantArray::get("ERROR: Exception thrown, but not caught!\n"); +      ConstantArray::get(M->getContext(), +                         "ERROR: Exception thrown, but not caught!\n");      AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0 -    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, +    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,                                                 GlobalValue::InternalLinkage, -                                               Msg, "abortmsg", M); -    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); +                                               Msg, "abortmsg"); +    std::vector<Constant*> GEPIdx(2, +                     Constant::getNullValue(Type::getInt32Ty(M->getContext())));      AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);    } else {      // The abort message for cheap EH support tells the user that EH is not      // enabled.      Constant *Msg = -      ConstantArray::get("Exception handler needed, but not enabled.  Recompile" -                         " program with -enable-correct-eh-support.\n"); +      ConstantArray::get(M->getContext(),  +                        "Exception handler needed, but not enabled."       +                        "Recompile program with -enable-correct-eh-support.\n");      AbortMessageLength = Msg->getNumOperands()-1;  // don't include \0 -    GlobalVariable *MsgGV = new GlobalVariable(Msg->getType(), true, +    GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,                                                 GlobalValue::InternalLinkage, -                                               Msg, "abortmsg", M); -    std::vector<Constant*> GEPIdx(2, Constant::getNullValue(Type::Int32Ty)); +                                               Msg, "abortmsg"); +    std::vector<Constant*> GEPIdx(2, Constant::getNullValue( +                                            Type::getInt32Ty(M->getContext())));      AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);    }  } @@ -249,8 +256,9 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {        // Insert a return instruction.  This really should be a "barrier", as it        // is unreachable. -      ReturnInst::Create(F.getReturnType() == Type::VoidTy ? 0 : -                         Constant::getNullValue(F.getReturnType()), UI); +      ReturnInst::Create(F.getContext(), +                         F.getReturnType() == Type::getVoidTy(F.getContext()) ? +                          0 : Constant::getNullValue(F.getReturnType()), UI);        // Remove the unwind instruction now.        BB->getInstList().erase(UI); @@ -265,7 +273,8 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {  void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,                                           AllocaInst *InvokeNum,                                           SwitchInst *CatchSwitch) { -  ConstantInt *InvokeNoC = ConstantInt::get(Type::Int32Ty, InvokeNo); +  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()), +                                            InvokeNo);    // If the unwind edge has phi nodes, split the edge.    if (isa<PHINode>(II->getUnwindDest()->begin())) { @@ -284,7 +293,8 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,    BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();    // nonvolatile. -  new StoreInst(Constant::getNullValue(Type::Int32Ty), InvokeNum, false, NI); +  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())),  +                InvokeNum, false, NI);    // Add a switch case to our unwind block.    CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); @@ -469,13 +479,15 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {      // alloca because the value needs to be live across invokes.      unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;      AllocaInst *JmpBuf = -      new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); +      new AllocaInst(JBLinkTy, 0, Align, +                     "jblink", F.begin()->begin());      std::vector<Value*> Idx; -    Idx.push_back(Constant::getNullValue(Type::Int32Ty)); -    Idx.push_back(ConstantInt::get(Type::Int32Ty, 1)); +    Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); +    Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1));      OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), -                                             "OldBuf", EntryBB->getTerminator()); +                                             "OldBuf", +                                              EntryBB->getTerminator());      // Copy the JBListHead to the alloca.      Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, @@ -487,20 +499,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {      // Create the catch block.  The catch block is basically a big switch      // statement that goes to all of the invoke catch blocks. -    BasicBlock *CatchBB = BasicBlock::Create("setjmp.catch", &F); +    BasicBlock *CatchBB = +            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);      // Create an alloca which keeps track of which invoke is currently      // executing.  For normal calls it contains zero. -    AllocaInst *InvokeNum = new AllocaInst(Type::Int32Ty, 0, "invokenum", -                                           EntryBB->begin()); -    new StoreInst(ConstantInt::get(Type::Int32Ty, 0), InvokeNum, true, -                  EntryBB->getTerminator()); +    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0, +                                           "invokenum",EntryBB->begin()); +    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),  +                  InvokeNum, true, EntryBB->getTerminator());      // Insert a load in the Catch block, and a switch on its value.  By default,      // we go to a block that just does an unwind (which is the correct action      // for a standard call). -    BasicBlock *UnwindBB = BasicBlock::Create("unwindbb", &F); -    Unwinds.push_back(new UnwindInst(UnwindBB)); +    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F); +    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));      Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);      SwitchInst *CatchSwitch = @@ -512,19 +525,21 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {      BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),                                                       "setjmp.cont"); -    Idx[1] = ConstantInt::get(Type::Int32Ty, 0); +    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);      Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),                                                   "TheJmpBuf",                                                   EntryBB->getTerminator()); -    JmpBufPtr = new BitCastInst(JmpBufPtr, PointerType::getUnqual(Type::Int8Ty), +    JmpBufPtr = new BitCastInst(JmpBufPtr, +                        Type::getInt8PtrTy(F.getContext()),                                  "tmp", EntryBB->getTerminator());      Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",                                      EntryBB->getTerminator());      // Compare the return value to zero. -    Value *IsNormal = new ICmpInst(ICmpInst::ICMP_EQ, SJRet, +    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), +                                   ICmpInst::ICMP_EQ, SJRet,                                     Constant::getNullValue(SJRet->getType()), -      "notunwind", EntryBB->getTerminator()); +                                   "notunwind");      // Nuke the uncond branch.      EntryBB->getTerminator()->eraseFromParent(); @@ -541,9 +556,10 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {    // Create three new blocks, the block to load the jmpbuf ptr and compare    // against null, the block to do the longjmp, and the error block for if it    // is null.  Add them at the end of the function because they are not hot. -  BasicBlock *UnwindHandler = BasicBlock::Create("dounwind", &F); -  BasicBlock *UnwindBlock = BasicBlock::Create("unwind", &F); -  BasicBlock *TermBlock = BasicBlock::Create("unwinderror", &F); +  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(), +                                                "dounwind", &F); +  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F); +  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);    // If this function contains an invoke, restore the old jumpbuf ptr.    Value *BufPtr; @@ -556,26 +572,27 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {    }    // Load the JBList, if it's null, then there was no catch! -  Value *NotNull = new ICmpInst(ICmpInst::ICMP_NE, BufPtr, +  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,                                  Constant::getNullValue(BufPtr->getType()), -    "notnull", UnwindHandler); +                                "notnull");    BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);    // Create the block to do the longjmp.    // Get a pointer to the jmpbuf and longjmp.    std::vector<Value*> Idx; -  Idx.push_back(Constant::getNullValue(Type::Int32Ty)); -  Idx.push_back(ConstantInt::get(Type::Int32Ty, 0)); +  Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); +  Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0));    Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf",                                       UnwindBlock); -  Idx[0] = new BitCastInst(Idx[0], PointerType::getUnqual(Type::Int8Ty), +  Idx[0] = new BitCastInst(Idx[0], +             Type::getInt8PtrTy(F.getContext()),                             "tmp", UnwindBlock); -  Idx[1] = ConstantInt::get(Type::Int32Ty, 1); +  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);    CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); -  new UnreachableInst(UnwindBlock); +  new UnreachableInst(F.getContext(), UnwindBlock);    // Set up the term block ("throw without a catch"). -  new UnreachableInst(TermBlock); +  new UnreachableInst(F.getContext(), TermBlock);    // Insert a new call to write(2, AbortMessage, AbortMessageLength);    writeAbortMessage(TermBlock->getTerminator()); diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 1da59360fc2b..764f0980cd2d 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -18,6 +18,7 @@  #include "llvm/Constants.h"  #include "llvm/Function.h"  #include "llvm/Instructions.h" +#include "llvm/LLVMContext.h"  #include "llvm/Pass.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/Support/Debug.h" @@ -108,8 +109,10 @@ bool LowerSwitch::runOnFunction(Function &F) {  // operator<< - Used for debugging purposes.  // -static std::ostream& operator<<(std::ostream &O, -                                const LowerSwitch::CaseVector &C) { +static raw_ostream& operator<<(raw_ostream &O, +                               const LowerSwitch::CaseVector &C) ATTRIBUTE_USED; +static raw_ostream& operator<<(raw_ostream &O, +                               const LowerSwitch::CaseVector &C) {    O << "[";    for (LowerSwitch::CaseVector::const_iterator B = C.begin(), @@ -121,11 +124,6 @@ static std::ostream& operator<<(std::ostream &O,    return O << "]";  } -static OStream& operator<<(OStream &O, const LowerSwitch::CaseVector &C) { -  if (O.stream()) *O.stream() << C; -  return O; -} -  // switchConvert - Convert the switch statement into a binary lookup of  // the case values. The function recursively builds this tree.  // @@ -140,9 +138,9 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,    unsigned Mid = Size / 2;    std::vector<CaseRange> LHS(Begin, Begin + Mid); -  DOUT << "LHS: " << LHS << "\n"; +  DEBUG(errs() << "LHS: " << LHS << "\n");    std::vector<CaseRange> RHS(Begin + Mid, End); -  DOUT << "RHS: " << RHS << "\n"; +  DEBUG(errs() << "RHS: " << RHS << "\n");    CaseRange& Pivot = *(Begin + Mid);    DEBUG(errs() << "Pivot ==> "  @@ -157,11 +155,12 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,    // Create a new node that checks if the value is < pivot. Go to the    // left branch if it is and right branch if not.    Function* F = OrigBlock->getParent(); -  BasicBlock* NewNode = BasicBlock::Create("NodeBlock"); +  BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");    Function::iterator FI = OrigBlock;    F->getBasicBlockList().insert(++FI, NewNode); -  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, Val, Pivot.Low, "Pivot"); +  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT, +                                Val, Pivot.Low, "Pivot");    NewNode->getInstList().push_back(Comp);    BranchInst::Create(LBranch, RBranch, Comp, NewNode);    return NewNode; @@ -178,7 +177,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,                                        BasicBlock* Default)  {    Function* F = OrigBlock->getParent(); -  BasicBlock* NewLeaf = BasicBlock::Create("LeafBlock"); +  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");    Function::iterator FI = OrigBlock;    F->getBasicBlockList().insert(++FI, NewLeaf); @@ -186,18 +185,18 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,    ICmpInst* Comp = NULL;    if (Leaf.Low == Leaf.High) {      // Make the seteq instruction... -    Comp = new ICmpInst(ICmpInst::ICMP_EQ, Val, Leaf.Low, -                        "SwitchLeaf", NewLeaf); +    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, +                        Leaf.Low, "SwitchLeaf");    } else {      // Make range comparison      if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {        // Val >= Min && Val <= Hi --> Val <= Hi -      Comp = new ICmpInst(ICmpInst::ICMP_SLE, Val, Leaf.High, -                          "SwitchLeaf", NewLeaf); +      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High, +                          "SwitchLeaf");      } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {        // Val >= 0 && Val <= Hi --> Val <=u Hi -      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Val, Leaf.High, -                          "SwitchLeaf", NewLeaf);       +      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, +                          "SwitchLeaf");            } else {        // Emit V-Lo <=u Hi-Lo        Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); @@ -205,8 +204,8 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,                                                     Val->getName()+".off",                                                     NewLeaf);        Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High); -      Comp = new ICmpInst(ICmpInst::ICMP_ULE, Add, UpperBound, -                          "SwitchLeaf", NewLeaf); +      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound, +                          "SwitchLeaf");      }    } @@ -290,7 +289,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {    // Create a new, empty default block so that the new hierarchy of    // if-then statements go to this and the PHI nodes are happy. -  BasicBlock* NewDefault = BasicBlock::Create("NewDefault"); +  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");    F->getBasicBlockList().insert(Default, NewDefault);    BranchInst::Create(Default, NewDefault); @@ -308,9 +307,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {    CaseVector Cases;    unsigned numCmps = Clusterify(Cases, SI); -  DOUT << "Clusterify finished. Total clusters: " << Cases.size() -       << ". Total compares: " << numCmps << "\n"; -  DOUT << "Cases: " << Cases << "\n"; +  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size() +               << ". Total compares: " << numCmps << "\n"); +  DEBUG(errs() << "Cases: " << Cases << "\n"); +  (void)numCmps;    BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,                                            OrigBlock, NewDefault); diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 2b06d778e145..5df08326d8bb 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -75,7 +75,7 @@ bool PromotePass::runOnFunction(Function &F) {      if (Allocas.empty()) break; -    PromoteMemToReg(Allocas, DT, DF); +    PromoteMemToReg(Allocas, DT, DF, F.getContext());      NumPromoted += Allocas.size();      Changed = true;    } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index b717699b7e05..9ca06bd180a1 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -23,13 +23,13 @@  #include "llvm/Function.h"  #include "llvm/Instructions.h"  #include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h"  #include "llvm/Analysis/Dominators.h"  #include "llvm/Analysis/AliasSetTracker.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/Compiler.h" @@ -41,7 +41,6 @@ STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");  STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");  STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted"); -// Provide DenseMapInfo for all pointers.  namespace llvm {  template<>  struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { @@ -181,6 +180,8 @@ namespace {      /// AST - An AliasSetTracker object to update.  If null, don't update it.      ///      AliasSetTracker *AST; +     +    LLVMContext &Context;      /// AllocaLookup - Reverse mapping of Allocas.      /// @@ -212,8 +213,9 @@ namespace {      DenseMap<const BasicBlock*, unsigned> BBNumPreds;    public:      PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, -                   DominanceFrontier &df, AliasSetTracker *ast) -      : Allocas(A), DT(dt), DF(df), AST(ast) {} +                   DominanceFrontier &df, AliasSetTracker *ast, +                   LLVMContext &C) +      : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {}      void run(); @@ -291,10 +293,9 @@ namespace {        // As we scan the uses of the alloca instruction, keep track of stores,        // and decide whether all of the loads and stores to the alloca are within        // the same basic block. -      for (Value::use_iterator U = AI->use_begin(), E = AI->use_end(); -           U != E;)  { -        Instruction *User = cast<Instruction>(*U); -        ++U; +      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); +           UI != E;)  { +        Instruction *User = cast<Instruction>(*UI++);          if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {            // Remove any uses of this alloca in DbgInfoInstrinsics.            assert(BC->hasOneUse() && "Unexpected alloca uses!"); @@ -303,7 +304,8 @@ namespace {            BC->eraseFromParent();            continue;          }  -        else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { +         +        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {            // Remember the basic blocks which define new values for the alloca            DefiningBlocks.push_back(SI->getParent());            AllocaPointerVal = SI->getOperand(0); @@ -491,17 +493,14 @@ void PromoteMem2Reg::run() {        PHINode *PN = I->second;        // If this PHI node merges one value and/or undefs, get the value. -      if (Value *V = PN->hasConstantValue(true)) { -        if (!isa<Instruction>(V) || -            properlyDominates(cast<Instruction>(V), PN)) { -          if (AST && isa<PointerType>(PN->getType())) -            AST->deleteValue(PN); -          PN->replaceAllUsesWith(V); -          PN->eraseFromParent(); -          NewPhiNodes.erase(I++); -          EliminatedAPHI = true; -          continue; -        } +      if (Value *V = PN->hasConstantValue(&DT)) { +        if (AST && isa<PointerType>(PN->getType())) +          AST->deleteValue(PN); +        PN->replaceAllUsesWith(V); +        PN->eraseFromParent(); +        NewPhiNodes.erase(I++); +        EliminatedAPHI = true; +        continue;        }        ++I;      } @@ -603,7 +602,9 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,          LiveInBlockWorklist.pop_back();          --i, --e;          break; -      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { +      } +       +      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {          if (LI->getOperand(0) != AI) continue;          // Okay, we found a load before a store to the alloca.  It is actually @@ -757,6 +758,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,    }  } +namespace {  /// StoreIndexSearchPredicate - This is a helper predicate used to search by the  /// first element of a pair. @@ -767,6 +769,8 @@ struct StoreIndexSearchPredicate {    }  }; +} +  /// PromoteSingleBlockAlloca - Many allocas are only used within a single basic  /// block.  If this is the case, avoid traversing the CFG and inserting a lot of  /// potentially useless PHI nodes by just performing a single linear pass over @@ -864,8 +868,8 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,    // Create a PhiNode using the dereferenced type... and add the phi-node to the    // BasicBlock.    PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), -                       Allocas[AllocaNo]->getName() + "." + -                       utostr(Version++), BB->begin()); +                       Allocas[AllocaNo]->getName() + "." + Twine(Version++),  +                       BB->begin());    ++NumPHIInsert;    PhiToAllocaMap[PN] = AllocaNo;    PN->reserveOperandSpace(getNumPreds(BB)); @@ -995,9 +999,9 @@ NextIteration:  ///  void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,                             DominatorTree &DT, DominanceFrontier &DF, -                           AliasSetTracker *AST) { +                           LLVMContext &Context, AliasSetTracker *AST) {    // If there is nothing to do, bail out...    if (Allocas.empty()) return; -  PromoteMem2Reg(Allocas, DT, DF, AST).run(); +  PromoteMem2Reg(Allocas, DT, DF, AST, Context).run();  } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp new file mode 100644 index 000000000000..780ee2638942 --- /dev/null +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -0,0 +1,335 @@ +//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy; +typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > > +                IncomingPredInfoTy; + +static AvailableValsTy &getAvailableVals(void *AV) { +  return *static_cast<AvailableValsTy*>(AV); +} + +static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) { +  return *static_cast<IncomingPredInfoTy*>(IPI); +} + + +SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI) +  : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {} + +SSAUpdater::~SSAUpdater() { +  delete &getAvailableVals(AV); +  delete &getIncomingPredInfo(IPI); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates.  ProtoValue is the value used to name PHI nodes. +void SSAUpdater::Initialize(Value *ProtoValue) { +  if (AV == 0) +    AV = new AvailableValsTy(); +  else +    getAvailableVals(AV).clear(); +   +  if (IPI == 0) +    IPI = new IncomingPredInfoTy(); +  else +    getIncomingPredInfo(IPI).clear(); +  PrototypeValue = ProtoValue; +} + +/// HasValueForBlock - Return true if the SSAUpdater already has a value for +/// the specified block. +bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { +  return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { +  assert(PrototypeValue != 0 && "Need to initialize SSAUpdater"); +  assert(PrototypeValue->getType() == V->getType() && +         "All rewritten values must have the same type"); +  getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) { +  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); +  Value *Res = GetValueAtEndOfBlockInternal(BB); +  assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State"); +  return Res; +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB.  Consider code like this: +/// +///      X1 = ... +///   SomeBB: +///      use(X) +///      X2 = ... +///      br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks.  However, the use of X happens in the *middle* of +/// a block.  Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { +  // If there is no definition of the renamed variable in this block, just use +  // GetValueAtEndOfBlock to do our work. +  if (!getAvailableVals(AV).count(BB)) +    return GetValueAtEndOfBlock(BB); +   +  // Otherwise, we have the hard case.  Get the live-in values for each +  // predecessor. +  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues; +  Value *SingularValue = 0; +   +  // We can get our predecessor info by walking the pred_iterator list, but it +  // is relatively slow.  If we already have PHI nodes in this block, walk one +  // of them to get the predecessor list instead. +  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { +    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { +      BasicBlock *PredBB = SomePhi->getIncomingBlock(i); +      Value *PredVal = GetValueAtEndOfBlock(PredBB); +      PredValues.push_back(std::make_pair(PredBB, PredVal)); +       +      // Compute SingularValue. +      if (i == 0) +        SingularValue = PredVal; +      else if (PredVal != SingularValue) +        SingularValue = 0; +    } +  } else { +    bool isFirstPred = true; +    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { +      BasicBlock *PredBB = *PI; +      Value *PredVal = GetValueAtEndOfBlock(PredBB); +      PredValues.push_back(std::make_pair(PredBB, PredVal)); +       +      // Compute SingularValue. +      if (isFirstPred) { +        SingularValue = PredVal; +        isFirstPred = false; +      } else if (PredVal != SingularValue) +        SingularValue = 0; +    } +  } +   +  // If there are no predecessors, just return undef. +  if (PredValues.empty()) +    return UndefValue::get(PrototypeValue->getType()); +   +  // Otherwise, if all the merged values are the same, just use it. +  if (SingularValue != 0) +    return SingularValue; +   +  // Otherwise, we do need a PHI: insert one now. +  PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(), +                                         PrototypeValue->getName(), +                                         &BB->front()); +  InsertedPHI->reserveOperandSpace(PredValues.size()); +   +  // Fill in all the predecessors of the PHI. +  for (unsigned i = 0, e = PredValues.size(); i != e; ++i) +    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first); +   +  // See if the PHI node can be merged to a single value.  This can happen in +  // loop cases when we get a PHI of itself and one other value. +  if (Value *ConstVal = InsertedPHI->hasConstantValue()) { +    InsertedPHI->eraseFromParent(); +    return ConstVal; +  } + +  // If the client wants to know about all new instructions, tell it. +  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); +   +  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n"); +  return InsertedPHI; +} + +/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void SSAUpdater::RewriteUse(Use &U) { +  Instruction *User = cast<Instruction>(U.getUser()); +  BasicBlock *UseBB = User->getParent(); +  if (PHINode *UserPN = dyn_cast<PHINode>(User)) +    UseBB = UserPN->getIncomingBlock(U); +   +  U.set(GetValueInMiddleOfBlock(UseBB)); +} + + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it.  If not, construct SSA form by +/// walking predecessors inserting PHI nodes as needed until we get to a block +/// where the value is available. +/// +Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { +  AvailableValsTy &AvailableVals = getAvailableVals(AV); +   +  // Query AvailableVals by doing an insertion of null. +  std::pair<AvailableValsTy::iterator, bool> InsertRes = +  AvailableVals.insert(std::make_pair(BB, WeakVH())); +   +  // Handle the case when the insertion fails because we have already seen BB. +  if (!InsertRes.second) { +    // If the insertion failed, there are two cases.  The first case is that the +    // value is already available for the specified block.  If we get this, just +    // return the value. +    if (InsertRes.first->second != 0) +      return InsertRes.first->second; +     +    // Otherwise, if the value we find is null, then this is the value is not +    // known but it is being computed elsewhere in our recursion.  This means +    // that we have a cycle.  Handle this by inserting a PHI node and returning +    // it.  When we get back to the first instance of the recursion we will fill +    // in the PHI node. +    return InsertRes.first->second = +    PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(), +                    &BB->front()); +  } +   +  // Okay, the value isn't in the map and we just inserted a null in the entry +  // to indicate that we're processing the block.  Since we have no idea what +  // value is in this block, we have to recurse through our predecessors. +  // +  // While we're walking our predecessors, we keep track of them in a vector, +  // then insert a PHI node in the end if we actually need one.  We could use a +  // smallvector here, but that would take a lot of stack space for every level +  // of the recursion, just use IncomingPredInfo as an explicit stack. +  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI); +  unsigned FirstPredInfoEntry = IncomingPredInfo.size(); +   +  // As we're walking the predecessors, keep track of whether they are all +  // producing the same value.  If so, this value will capture it, if not, it +  // will get reset to null.  We distinguish the no-predecessor case explicitly +  // below. +  TrackingVH<Value> SingularValue; +   +  // We can get our predecessor info by walking the pred_iterator list, but it +  // is relatively slow.  If we already have PHI nodes in this block, walk one +  // of them to get the predecessor list instead. +  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) { +    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { +      BasicBlock *PredBB = SomePhi->getIncomingBlock(i); +      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); +      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); +       +      // Compute SingularValue. +      if (i == 0) +        SingularValue = PredVal; +      else if (PredVal != SingularValue) +        SingularValue = 0; +    } +  } else { +    bool isFirstPred = true; +    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { +      BasicBlock *PredBB = *PI; +      Value *PredVal = GetValueAtEndOfBlockInternal(PredBB); +      IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal)); +       +      // Compute SingularValue. +      if (isFirstPred) { +        SingularValue = PredVal; +        isFirstPred = false; +      } else if (PredVal != SingularValue) +        SingularValue = 0; +    } +  } +   +  // If there are no predecessors, then we must have found an unreachable block +  // just return 'undef'.  Since there are no predecessors, InsertRes must not +  // be invalidated. +  if (IncomingPredInfo.size() == FirstPredInfoEntry) +    return InsertRes.first->second = UndefValue::get(PrototypeValue->getType()); +   +  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If +  /// this block is involved in a loop, a no-entry PHI node will have been +  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted +  /// above. +  TrackingVH<Value> &InsertedVal = AvailableVals[BB]; +   +  // If all the predecessor values are the same then we don't need to insert a +  // PHI.  This is the simple and common case. +  if (SingularValue) { +    // If a PHI node got inserted, replace it with the singlar value and delete +    // it. +    if (InsertedVal) { +      PHINode *OldVal = cast<PHINode>(InsertedVal); +      // Be careful about dead loops.  These RAUW's also update InsertedVal. +      if (InsertedVal != SingularValue) +        OldVal->replaceAllUsesWith(SingularValue); +      else +        OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType())); +      OldVal->eraseFromParent(); +    } else { +      InsertedVal = SingularValue; +    } +     +    // Drop the entries we added in IncomingPredInfo to restore the stack. +    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, +                           IncomingPredInfo.end()); +    return InsertedVal; +  } +   +  // Otherwise, we do need a PHI: insert one now if we don't already have one. +  if (InsertedVal == 0) +    InsertedVal = PHINode::Create(PrototypeValue->getType(), +                                  PrototypeValue->getName(), &BB->front()); +   +  PHINode *InsertedPHI = cast<PHINode>(InsertedVal); +  InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry); +   +  // Fill in all the predecessors of the PHI. +  for (IncomingPredInfoTy::iterator I = +         IncomingPredInfo.begin()+FirstPredInfoEntry, +       E = IncomingPredInfo.end(); I != E; ++I) +    InsertedPHI->addIncoming(I->second, I->first); +   +  // Drop the entries we added in IncomingPredInfo to restore the stack. +  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry, +                         IncomingPredInfo.end()); +   +  // See if the PHI node can be merged to a single value.  This can happen in +  // loop cases when we get a PHI of itself and one other value. +  if (Value *ConstVal = InsertedPHI->hasConstantValue()) { +    InsertedPHI->replaceAllUsesWith(ConstVal); +    InsertedPHI->eraseFromParent(); +    InsertedVal = ConstVal; +  } else { +    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n"); +     +    // If the client wants to know about all new instructions, tell it. +    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); +  } +   +  return InsertedVal; +} + + diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp index 4c4dd37ddf75..3bb2e8ee6911 100644 --- a/lib/Transforms/Utils/SSI.cpp +++ b/lib/Transforms/Utils/SSI.cpp @@ -23,6 +23,7 @@  #include "llvm/Transforms/Scalar.h"  #include "llvm/Transforms/Utils/SSI.h" +#include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/Dominators.h"  using namespace llvm; @@ -30,11 +31,12 @@ using namespace llvm;  static const std::string SSI_PHI = "SSI_phi";  static const std::string SSI_SIG = "SSI_sigma"; -static const unsigned UNSIGNED_INFINITE = ~0U; +STATISTIC(NumSigmaInserted, "Number of sigma functions inserted"); +STATISTIC(NumPhiInserted, "Number of phi functions inserted");  void SSI::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequired<DominanceFrontier>(); -  AU.addRequired<DominatorTree>(); +  AU.addRequiredTransitive<DominanceFrontier>(); +  AU.addRequiredTransitive<DominatorTree>();    AU.setPreservesAll();  } @@ -45,22 +47,23 @@ bool SSI::runOnFunction(Function &F) {  /// This methods creates the SSI representation for the list of values  /// received. It will only create SSI representation if a value is used -/// in a to decide a branch. Repeated values are created only once. +/// to decide a branch. Repeated values are created only once.  ///  void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {    init(value); -  for (unsigned i = 0; i < num_values; ++i) { -    if (created.insert(value[i])) { -      needConstruction[i] = true; -    } -  } -  insertSigmaFunctions(value); +  SmallPtrSet<Instruction*, 4> needConstruction; +  for (SmallVectorImpl<Instruction*>::iterator I = value.begin(), +       E = value.end(); I != E; ++I) +    if (created.insert(*I)) +      needConstruction.insert(*I); + +  insertSigmaFunctions(needConstruction);    // Test if there is a need to transform to SSI -  if (needConstruction.any()) { -    insertPhiFunctions(value); -    renameInit(value); +  if (!needConstruction.empty()) { +    insertPhiFunctions(needConstruction); +    renameInit(needConstruction);      rename(DT_->getRoot());      fixPhis();    } @@ -71,100 +74,107 @@ void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {  /// Insert sigma functions (a sigma function is a phi function with one  /// operator)  /// -void SSI::insertSigmaFunctions(SmallVectorImpl<Instruction *> &value) { -  for (unsigned i = 0; i < num_values; ++i) { -    if (!needConstruction[i]) -      continue; - -    bool need = false; -    for (Value::use_iterator begin = value[i]->use_begin(), end = -         value[i]->use_end(); begin != end; ++begin) { +void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) { +  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), +       E = value.end(); I != E; ++I) { +    for (Value::use_iterator begin = (*I)->use_begin(), +         end = (*I)->use_end(); begin != end; ++begin) {        // Test if the Use of the Value is in a comparator -      CmpInst *CI = dyn_cast<CmpInst>(begin); -      if (CI && isUsedInTerminator(CI)) { -        // Basic Block of the Instruction -        BasicBlock *BB = CI->getParent(); -        // Last Instruction of the Basic Block -        const TerminatorInst *TI = BB->getTerminator(); - -        for (unsigned j = 0, e = TI->getNumSuccessors(); j < e; ++j) { -          // Next Basic Block -          BasicBlock *BB_next = TI->getSuccessor(j); -          if (BB_next != BB && -              BB_next->getUniquePredecessor() != NULL && -              dominateAny(BB_next, value[i])) { -            PHINode *PN = PHINode::Create( -                value[i]->getType(), SSI_SIG, BB_next->begin()); -            PN->addIncoming(value[i], BB); -            sigmas.insert(std::make_pair(PN, i)); -            created.insert(PN); -            need = true; -            defsites[i].push_back(BB_next); +      if (CmpInst *CI = dyn_cast<CmpInst>(begin)) { +        // Iterates through all uses of CmpInst +        for (Value::use_iterator begin_ci = CI->use_begin(), +             end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) { +          // Test if any use of CmpInst is in a Terminator +          if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) { +            insertSigma(TI, *I);            }          }        }      } -    needConstruction[i] = need; +  } +} + +/// Inserts Sigma Functions in every BasicBlock successor to Terminator +/// Instruction TI. All inserted Sigma Function are related to Instruction I. +/// +void SSI::insertSigma(TerminatorInst *TI, Instruction *I) { +  // Basic Block of the Terminator Instruction +  BasicBlock *BB = TI->getParent(); +  for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { +    // Next Basic Block +    BasicBlock *BB_next = TI->getSuccessor(i); +    if (BB_next != BB && +        BB_next->getSinglePredecessor() != NULL && +        dominateAny(BB_next, I)) { +      PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin()); +      PN->addIncoming(I, BB); +      sigmas[PN] = I; +      created.insert(PN); +      defsites[I].push_back(BB_next); +      ++NumSigmaInserted; +    }    }  }  /// Insert phi functions when necessary  /// -void SSI::insertPhiFunctions(SmallVectorImpl<Instruction *> &value) { +void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {    DominanceFrontier *DF = &getAnalysis<DominanceFrontier>(); -  for (unsigned i = 0; i < num_values; ++i) { +  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), +       E = value.end(); I != E; ++I) {      // Test if there were any sigmas for this variable -    if (needConstruction[i]) { - -      SmallPtrSet<BasicBlock *, 1> BB_visited; - -      // Insert phi functions if there is any sigma function -      while (!defsites[i].empty()) { - -        BasicBlock *BB = defsites[i].back(); - -        defsites[i].pop_back(); -        DominanceFrontier::iterator DF_BB = DF->find(BB); - -        // Iterates through all the dominance frontier of BB -        for (std::set<BasicBlock *>::iterator DF_BB_begin = -             DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); -             DF_BB_begin != DF_BB_end; ++DF_BB_begin) { -          BasicBlock *BB_dominated = *DF_BB_begin; - -          // Test if has not yet visited this node and if the -          // original definition dominates this node -          if (BB_visited.insert(BB_dominated) && -              DT_->properlyDominates(value_original[i], BB_dominated) && -              dominateAny(BB_dominated, value[i])) { -            PHINode *PN = PHINode::Create( -                value[i]->getType(), SSI_PHI, BB_dominated->begin()); -            phis.insert(std::make_pair(PN, i)); -            created.insert(PN); - -            defsites[i].push_back(BB_dominated); -          } +    SmallPtrSet<BasicBlock *, 16> BB_visited; + +    // Insert phi functions if there is any sigma function +    while (!defsites[*I].empty()) { + +      BasicBlock *BB = defsites[*I].back(); + +      defsites[*I].pop_back(); +      DominanceFrontier::iterator DF_BB = DF->find(BB); + +      // The BB is unreachable. Skip it. +      if (DF_BB == DF->end()) +        continue;  + +      // Iterates through all the dominance frontier of BB +      for (std::set<BasicBlock *>::iterator DF_BB_begin = +           DF_BB->second.begin(), DF_BB_end = DF_BB->second.end(); +           DF_BB_begin != DF_BB_end; ++DF_BB_begin) { +        BasicBlock *BB_dominated = *DF_BB_begin; + +        // Test if has not yet visited this node and if the +        // original definition dominates this node +        if (BB_visited.insert(BB_dominated) && +            DT_->properlyDominates(value_original[*I], BB_dominated) && +            dominateAny(BB_dominated, *I)) { +          PHINode *PN = PHINode::Create( +              (*I)->getType(), SSI_PHI, BB_dominated->begin()); +          phis.insert(std::make_pair(PN, *I)); +          created.insert(PN); + +          defsites[*I].push_back(BB_dominated); +          ++NumPhiInserted;          }        } -      BB_visited.clear();      } +    BB_visited.clear();    }  }  /// Some initialization for the rename part  /// -void SSI::renameInit(SmallVectorImpl<Instruction *> &value) { -  value_stack.resize(num_values); -  for (unsigned i = 0; i < num_values; ++i) { -    value_stack[i].push_back(value[i]); -  } +void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) { +  for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(), +       E = value.end(); I != E; ++I) +    value_stack[*I].push_back(*I);  }  /// Renames all variables in the specified BasicBlock.  /// Only variables that need to be rename will be.  ///  void SSI::rename(BasicBlock *BB) { -  BitVector *defined = new BitVector(num_values, false); +  SmallPtrSet<Instruction*, 8> defined;    // Iterate through instructions and make appropriate renaming.    // For SSI_PHI (b = PHI()), store b at value_stack as a new @@ -178,19 +188,17 @@ void SSI::rename(BasicBlock *BB) {         begin != end; ++begin) {      Instruction *I = begin;      if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions -      int position; +      Instruction* position;        // Treat SSI_PHI -      if ((position = getPositionPhi(PN)) != -1) { +      if ((position = getPositionPhi(PN))) {          value_stack[position].push_back(PN); -        (*defined)[position] = true; -      } - +        defined.insert(position);        // Treat SSI_SIG -      else if ((position = getPositionSigma(PN)) != -1) { +      } else if ((position = getPositionSigma(PN))) {          substituteUse(I);          value_stack[position].push_back(PN); -        (*defined)[position] = true; +        defined.insert(position);        }        // Treat all other PHI functions @@ -216,10 +224,9 @@ void SSI::rename(BasicBlock *BB) {      for (BasicBlock::iterator begin = BB_succ->begin(),           notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {        Instruction *I = begin; -      PHINode *PN; -      int position; -      if ((PN = dyn_cast<PHINode>(I)) && ((position -          = getPositionPhi(PN)) != -1)) { +      PHINode *PN = dyn_cast<PHINode>(I); +      Instruction* position; +      if (PN && ((position = getPositionPhi(PN)))) {          PN->addIncoming(value_stack[position].back(), BB);        }      } @@ -237,13 +244,9 @@ void SSI::rename(BasicBlock *BB) {    // Now we remove all inserted definitions of a variable from the top of    // the stack leaving the previous one as the top. -  if (defined->any()) { -    for (unsigned i = 0; i < num_values; ++i) { -      if ((*defined)[i]) { -        value_stack[i].pop_back(); -      } -    } -  } +  for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(), +       DE = defined.end(); DI != DE; ++DI) +    value_stack[*DI].pop_back();  }  /// Substitute any use in this instruction for the last definition of @@ -252,23 +255,24 @@ void SSI::rename(BasicBlock *BB) {  void SSI::substituteUse(Instruction *I) {    for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {      Value *operand = I->getOperand(i); -    for (unsigned j = 0; j < num_values; ++j) { -      if (operand == value_stack[j].front() && -          I != value_stack[j].back()) { +    for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator +         VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) { +      if (operand == VI->second.front() && +          I != VI->second.back()) {          PHINode *PN_I = dyn_cast<PHINode>(I); -        PHINode *PN_vs = dyn_cast<PHINode>(value_stack[j].back()); +        PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());          // If a phi created in a BasicBlock is used as an operand of another          // created in the same BasicBlock, this step marks this second phi,          // to fix this issue later. It cannot be fixed now, because the          // operands of the first phi are not final yet.          if (PN_I && PN_vs && -            value_stack[j].back()->getParent() == I->getParent()) { +            VI->second.back()->getParent() == I->getParent()) {            phisToFix.insert(PN_I);          } -        I->setOperand(i, value_stack[j].back()); +        I->setOperand(i, VI->second.back());          break;        }      } @@ -276,12 +280,16 @@ void SSI::substituteUse(Instruction *I) {  }  /// Test if the BasicBlock BB dominates any use or definition of value. +/// If it dominates a phi instruction that is on the same BasicBlock, +/// that does not count.  ///  bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {    for (Value::use_iterator begin = value->use_begin(),         end = value->use_end(); begin != end; ++begin) {      Instruction *I = cast<Instruction>(*begin);      BasicBlock *BB_father = I->getParent(); +    if (BB == BB_father && isa<PHINode>(I)) +      continue;      if (DT_->dominates(BB, BB_father)) {        return true;      } @@ -293,31 +301,54 @@ bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {  /// as an operand of another phi function used in the same BasicBlock,  /// LLVM looks this as an error. So on the second phi, the first phi is called  /// P and the BasicBlock it incomes is B. This P will be replaced by the value -/// it has for BasicBlock B. +/// it has for BasicBlock B. It also includes undef values for predecessors +/// that were not included in the phi.  ///  void SSI::fixPhis() {    for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),         end = phisToFix.end(); begin != end; ++begin) {      PHINode *PN = *begin;      for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { -      PHINode *PN_father; -      if ((PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i))) && -          PN->getParent() == PN_father->getParent()) { +      PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i)); +      if (PN_father && PN->getParent() == PN_father->getParent() && +          !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {          BasicBlock *BB = PN->getIncomingBlock(i);          int pos = PN_father->getBasicBlockIndex(BB);          PN->setIncomingValue(i, PN_father->getIncomingValue(pos));        }      }    } + +  for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(), +       end = phis.end(); begin != end; ++begin) { +    PHINode *PN = begin->first; +    BasicBlock *BB = PN->getParent(); +    pred_iterator PI = pred_begin(BB), PE = pred_end(BB); +    SmallVector<BasicBlock*, 8> Preds(PI, PE); +    for (unsigned size = Preds.size(); +         PI != PE && PN->getNumIncomingValues() != size; ++PI) { +      bool found = false; +      for (unsigned i = 0, pn_end = PN->getNumIncomingValues(); +           i < pn_end; ++i) { +        if (PN->getIncomingBlock(i) == *PI) { +          found = true; +          break; +        } +      } +      if (!found) { +        PN->addIncoming(UndefValue::get(PN->getType()), *PI); +      } +    } +  }  }  /// Return which variable (position on the vector of variables) this phi  /// represents on the phis list.  /// -unsigned SSI::getPositionPhi(PHINode *PN) { -  DenseMap<PHINode *, unsigned>::iterator val = phis.find(PN); +Instruction* SSI::getPositionPhi(PHINode *PN) { +  DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);    if (val == phis.end()) -    return UNSIGNED_INFINITE; +    return 0;    else      return val->second;  } @@ -325,52 +356,27 @@ unsigned SSI::getPositionPhi(PHINode *PN) {  /// Return which variable (position on the vector of variables) this phi  /// represents on the sigmas list.  /// -unsigned SSI::getPositionSigma(PHINode *PN) { -  DenseMap<PHINode *, unsigned>::iterator val = sigmas.find(PN); +Instruction* SSI::getPositionSigma(PHINode *PN) { +  DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);    if (val == sigmas.end()) -    return UNSIGNED_INFINITE; +    return 0;    else      return val->second;  } -/// Return true if the the Comparison Instruction is an operator -/// of the Terminator instruction of its Basic Block. -/// -unsigned SSI::isUsedInTerminator(CmpInst *CI) { -  TerminatorInst *TI = CI->getParent()->getTerminator(); -  if (TI->getNumOperands() == 0) { -    return false; -  } else if (CI == TI->getOperand(0)) { -    return true; -  } else { -    return false; -  } -} -  /// Initializes  ///  void SSI::init(SmallVectorImpl<Instruction *> &value) { -  num_values = value.size(); -  needConstruction.resize(num_values, false); - -  value_original.resize(num_values); -  defsites.resize(num_values); - -  for (unsigned i = 0; i < num_values; ++i) { -    value_original[i] = value[i]->getParent(); -    defsites[i].push_back(value_original[i]); +  for (SmallVectorImpl<Instruction *>::iterator I = value.begin(), +       E = value.end(); I != E; ++I) { +    value_original[*I] = (*I)->getParent(); +    defsites[*I].push_back((*I)->getParent());    }  }  /// Clean all used resources in this creation of SSI  ///  void SSI::clean() { -  for (unsigned i = 0; i < num_values; ++i) { -    defsites[i].clear(); -    if (i < value_stack.size()) -      value_stack[i].clear(); -  } -    phis.clear();    sigmas.clear();    phisToFix.clear(); @@ -378,7 +384,6 @@ void SSI::clean() {    defsites.clear();    value_stack.clear();    value_original.clear(); -  needConstruction.clear();  }  /// createSSIPass - The public interface to this file... @@ -388,3 +393,40 @@ FunctionPass *llvm::createSSIPass() { return new SSI(); }  char SSI::ID = 0;  static RegisterPass<SSI> X("ssi", "Static Single Information Construction"); +/// SSIEverything - A pass that runs createSSI on every non-void variable, +/// intended for debugging. +namespace { +  struct VISIBILITY_HIDDEN SSIEverything : public FunctionPass { +    static char ID; // Pass identification, replacement for typeid +    SSIEverything() : FunctionPass(&ID) {} + +    bool runOnFunction(Function &F); + +    virtual void getAnalysisUsage(AnalysisUsage &AU) const { +      AU.addRequired<SSI>(); +    } +  }; +} + +bool SSIEverything::runOnFunction(Function &F) { +  SmallVector<Instruction *, 16> Insts; +  SSI &ssi = getAnalysis<SSI>(); + +  if (F.isDeclaration() || F.isIntrinsic()) return false; + +  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) +    for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) +      if (I->getType() != Type::getVoidTy(F.getContext())) +        Insts.push_back(I); + +  ssi.createSSI(Insts); +  return true; +} + +/// createSSIEverythingPass - The public interface to this file... +/// +FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); } + +char SSIEverything::ID = 0; +static RegisterPass<SSIEverything> +Y("ssi-everything", "Static Single Information Construction"); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 58d4d5a344c1..6fd7d7bf9aea 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -21,6 +21,7 @@  #include "llvm/GlobalVariable.h"  #include "llvm/Support/CFG.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/ADT/SmallVector.h" @@ -84,19 +85,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,  static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {    assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); -  DOUT << "Looking to fold " << BB->getNameStart() << " into "  -       << Succ->getNameStart() << "\n"; +  DEBUG(errs() << "Looking to fold " << BB->getName() << " into "  +        << Succ->getName() << "\n");    // Shortcut, if there is only a single predecessor it must be BB and merging    // is always safe    if (Succ->getSinglePredecessor()) return true; -  typedef SmallPtrSet<Instruction*, 16> InstrSet; -  InstrSet BBPHIs; - -  // Make a list of all phi nodes in BB -  BasicBlock::iterator BBI = BB->begin(); -  while (isa<PHINode>(*BBI)) BBPHIs.insert(BBI++); -    // Make a list of the predecessors of BB    typedef SmallPtrSet<BasicBlock*, 16> BlockSet;    BlockSet BBPreds(pred_begin(BB), pred_end(BB)); @@ -126,16 +120,13 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {              PI != PE; PI++) {          if (BBPN->getIncomingValueForBlock(*PI)                 != PN->getIncomingValueForBlock(*PI)) { -          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in "  -               << Succ->getNameStart() << " is conflicting with "  -               << BBPN->getNameStart() << " with regard to common predecessor " -               << (*PI)->getNameStart() << "\n"; +          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "  +                << Succ->getName() << " is conflicting with "  +                << BBPN->getName() << " with regard to common predecessor " +                << (*PI)->getName() << "\n");            return false;          }        } -      // Remove this phinode from the list of phis in BB, since it has been -      // handled. -      BBPHIs.erase(BBPN);      } else {        Value* Val = PN->getIncomingValueForBlock(BB);        for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); @@ -144,33 +135,15 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {          // one for BB, in which case this phi node will not prevent the merging          // of the block.          if (Val != PN->getIncomingValueForBlock(*PI)) { -          DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in "  -          << Succ->getNameStart() << " is conflicting with regard to common " -          << "predecessor " << (*PI)->getNameStart() << "\n"; +          DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "  +                << Succ->getName() << " is conflicting with regard to common " +                << "predecessor " << (*PI)->getName() << "\n");            return false;          }        }      }    } -  // If there are any other phi nodes in BB that don't have a phi node in Succ -  // to merge with, they must be moved to Succ completely. However, for any -  // predecessors of Succ, branches will be added to the phi node that just -  // point to itself. So, for any common predecessors, this must not cause -  // conflicts. -  for (InstrSet::iterator I = BBPHIs.begin(), E = BBPHIs.end(); -        I != E; I++) { -    PHINode *PN = cast<PHINode>(*I); -    for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end(); -          PI != PE; PI++) -      if (PN->getIncomingValueForBlock(*PI) != PN) { -        DOUT << "Can't fold, phi node " << *PN->getNameStart() << " in "  -             << BB->getNameStart() << " is conflicting with regard to common " -             << "predecessor " << (*PI)->getNameStart() << "\n"; -        return false; -      } -  } -    return true;  } @@ -182,8 +155,36 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,    // Check to see if merging these blocks would cause conflicts for any of the    // phi nodes in BB or Succ. If not, we can safely merge.    if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false; -   -  DOUT << "Killing Trivial BB: \n" << *BB; + +  // Check for cases where Succ has multiple predecessors and a PHI node in BB +  // has uses which will not disappear when the PHI nodes are merged.  It is +  // possible to handle such cases, but difficult: it requires checking whether +  // BB dominates Succ, which is non-trivial to calculate in the case where +  // Succ has multiple predecessors.  Also, it requires checking whether +  // constructing the necessary self-referential PHI node doesn't intoduce any +  // conflicts; this isn't too difficult, but the previous code for doing this +  // was incorrect. +  // +  // Note that if this check finds a live use, BB dominates Succ, so BB is +  // something like a loop pre-header (or rarely, a part of an irreducible CFG); +  // folding the branch isn't profitable in that case anyway. +  if (!Succ->getSinglePredecessor()) { +    BasicBlock::iterator BBI = BB->begin(); +    while (isa<PHINode>(*BBI)) { +      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end(); +           UI != E; ++UI) { +        if (PHINode* PN = dyn_cast<PHINode>(*UI)) { +          if (PN->getIncomingBlock(UI) != BB) +            return false; +        } else { +          return false; +        } +      } +      ++BBI; +    } +  } + +  DEBUG(errs() << "Killing Trivial BB: \n" << *BB);    if (isa<PHINode>(Succ->begin())) {      // If there is more than one pred of succ, and there are PHI nodes in @@ -217,38 +218,16 @@ static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,      }    } -  if (isa<PHINode>(&BB->front())) { -    SmallVector<BasicBlock*, 16> -    OldSuccPreds(pred_begin(Succ), pred_end(Succ)); -     -    // Move all PHI nodes in BB to Succ if they are alive, otherwise -    // delete them. -    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { -      if (PN->use_empty()) { -        // Just remove the dead phi.  This happens if Succ's PHIs were the only -        // users of the PHI nodes. -        PN->eraseFromParent(); -        continue; -      } -     -      // The instruction is alive, so this means that BB must dominate all -      // predecessors of Succ (Since all uses of the PN are after its -      // definition, so in Succ or a block dominated by Succ. If a predecessor -      // of Succ would not be dominated by BB, PN would violate the def before -      // use SSA demand). Therefore, we can simply move the phi node to the -      // next block. +  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) { +    if (Succ->getSinglePredecessor()) { +      // BB is the only predecessor of Succ, so Succ will end up with exactly +      // the same predecessors BB had.        Succ->getInstList().splice(Succ->begin(),                                   BB->getInstList(), BB->begin()); -       -      // We need to add new entries for the PHI node to account for -      // predecessors of Succ that the PHI node does not take into -      // account.  At this point, since we know that BB dominated succ and all -      // of its predecessors, this means that we should any newly added -      // incoming edges should use the PHI node itself as the value for these -      // edges, because they are loop back edges. -      for (unsigned i = 0, e = OldSuccPreds.size(); i != e; ++i) -        if (OldSuccPreds[i] != BB) -          PN->addIncoming(PN, OldSuccPreds[i]); +    } else { +      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs. +      assert(PN->use_empty() && "There shouldn't be any uses here!"); +      PN->eraseFromParent();      }    } @@ -383,26 +362,15 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,        // Okay, it looks like the instruction IS in the "condition".  Check to        // see if its a cheap instruction to unconditionally compute, and if it        // only uses stuff defined outside of the condition.  If so, hoist it out. +      if (!I->isSafeToSpeculativelyExecute()) +        return false; +        switch (I->getOpcode()) {        default: return false;  // Cannot hoist this out safely.        case Instruction::Load: { -        // We can hoist loads that are non-volatile and obviously cannot trap. -        if (cast<LoadInst>(I)->isVolatile()) -          return false; -        // FIXME: A computation of a constant can trap! -        if (!isa<AllocaInst>(I->getOperand(0)) && -            !isa<Constant>(I->getOperand(0))) -          return false; -        // External weak globals may have address 0, so we can't load them. -        Value *V2 = I->getOperand(0)->getUnderlyingObject(); -        if (V2) { -          GlobalVariable* GV = dyn_cast<GlobalVariable>(V2); -          if (GV && GV->hasExternalWeakLinkage()) -            return false; -        } -        // Finally, we have to check to make sure there are no instructions -        // before the load in its basic block, as we are going to hoist the loop -        // out to its predecessor. +        // We have to check to make sure there are no instructions before the +        // load in its basic block, as we are going to hoist the loop out to +        // its predecessor.          BasicBlock::iterator IP = PBB->begin();          while (isa<DbgInfoIntrinsic>(IP))            IP++; @@ -645,12 +613,13 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,          assert(ThisCases.size() == 1 && "Branch can only have one case!");          // Insert the new branch.          Instruction *NI = BranchInst::Create(ThisDef, TI); +        (void) NI;          // Remove PHI node entries for the dead edge.          ThisCases[0].second->removePredecessor(TI->getParent()); -        DOUT << "Threading pred instr: " << *Pred->getTerminator() -             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; +        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() +             << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");          EraseTerminatorInstAndDCECond(TI);          return true; @@ -662,8 +631,8 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,          for (unsigned i = 0, e = PredCases.size(); i != e; ++i)            DeadCases.insert(PredCases[i].first); -        DOUT << "Threading pred instr: " << *Pred->getTerminator() -             << "Through successor TI: " << *TI; +        DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() +                     << "Through successor TI: " << *TI);          for (unsigned i = SI->getNumCases()-1; i != 0; --i)            if (DeadCases.count(SI->getCaseValue(i))) { @@ -671,7 +640,7 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,              SI->removeCase(i);            } -        DOUT << "Leaving: " << *TI << "\n"; +        DEBUG(errs() << "Leaving: " << *TI << "\n");          return true;        }      } @@ -712,9 +681,10 @@ static bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,      // Insert the new branch.      Instruction *NI = BranchInst::Create(TheRealDest, TI); +    (void) NI; -    DOUT << "Threading pred instr: " << *Pred->getTerminator() -         << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"; +    DEBUG(errs() << "Threading pred instr: " << *Pred->getTerminator() +              << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");      EraseTerminatorInstAndDCECond(TI);      return true; @@ -847,7 +817,8 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {            if (InfLoopBlock == 0) {              // Insert it at the end of the function, because it's either code,              // or it won't matter if it's hot. :) -            InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); +            InfLoopBlock = BasicBlock::Create(BB->getContext(), +                                              "infloop", BB->getParent());              BranchInst::Create(InfLoopBlock, InfLoopBlock);            }            NewSI->setSuccessor(i, InfLoopBlock); @@ -900,7 +871,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {    while (isa<DbgInfoIntrinsic>(I2))      I2 = BB2_Itr++;    if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) || -      !I1->isIdenticalTo(I2) || +      !I1->isIdenticalToWhenDefined(I2) ||        (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))      return false; @@ -919,6 +890,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {      BIParent->getInstList().splice(BI, BB1->getInstList(), I1);      if (!I2->use_empty())        I2->replaceAllUsesWith(I1); +    I1->intersectOptionalDataWith(I2);      BB2->getInstList().erase(I2);      I1 = BB1_Itr++; @@ -927,7 +899,8 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {      I2 = BB2_Itr++;      while (isa<DbgInfoIntrinsic>(I2))        I2 = BB2_Itr++; -  } while (I1->getOpcode() == I2->getOpcode() && I1->isIdenticalTo(I2)); +  } while (I1->getOpcode() == I2->getOpcode() && +           I1->isIdenticalToWhenDefined(I2));    return true; @@ -939,7 +912,7 @@ HoistTerminator:    // Okay, it is safe to hoist the terminator.    Instruction *NT = I1->clone();    BIParent->getInstList().insert(BI, NT); -  if (NT->getType() != Type::VoidTy) { +  if (NT->getType() != Type::getVoidTy(BB1->getContext())) {      I1->replaceAllUsesWith(NT);      I2->replaceAllUsesWith(NT);      NT->takeName(I1); @@ -1197,7 +1170,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {      ConstantInt *CB;      if ((CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i))) && -        CB->getType() == Type::Int1Ty) { +        CB->getType() == Type::getInt1Ty(BB->getContext())) {        // Okay, we now know that all edges from PredBB should be revectored to        // branch to RealDest.        BasicBlock *PredBB = PN->getIncomingBlock(i); @@ -1209,7 +1182,8 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {        // difficult cases.  Instead of being smart about this, just insert a new        // block that jumps to the destination block, effectively splitting        // the edge we are about to create. -      BasicBlock *EdgeBB = BasicBlock::Create(RealDest->getName()+".critedge", +      BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), +                                              RealDest->getName()+".critedge",                                                RealDest->getParent(), RealDest);        BranchInst::Create(RealDest, EdgeBB);        PHINode *PN; @@ -1242,7 +1216,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {            }            // Check for trivial simplification. -          if (Constant *C = ConstantFoldInstruction(N)) { +          if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) {              TranslateMap[BBI] = C;              delete N;   // Constant folded away, don't need actual inst            } else { @@ -1296,8 +1270,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN) {      if (NumPhis > 2)        return false; -  DOUT << "FOUND IF CONDITION!  " << *IfCond << "  T: " -       << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n"; +  DEBUG(errs() << "FOUND IF CONDITION!  " << *IfCond << "  T: " +        << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");    // Loop over the PHI's seeing if we can promote them all to select    // instructions.  While we are at it, keep track of the instructions @@ -1427,7 +1401,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {    if (FalseRet->getNumOperands() == 0) {      TrueSucc->removePredecessor(BI->getParent());      FalseSucc->removePredecessor(BI->getParent()); -    ReturnInst::Create(0, BI); +    ReturnInst::Create(BI->getContext(), 0, BI);      EraseTerminatorInstAndDCECond(BI);      return true;    } @@ -1476,12 +1450,13 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {    }    Value *RI = !TrueValue ? -              ReturnInst::Create(BI) : -              ReturnInst::Create(TrueValue, BI); +              ReturnInst::Create(BI->getContext(), BI) : +              ReturnInst::Create(BI->getContext(), TrueValue, BI); +  (void) RI; -  DOUT << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" -       << "\n  " << *BI << "NewRet = " << *RI -       << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc; +  DEBUG(errs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" +               << "\n  " << *BI << "NewRet = " << *RI +               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);    EraseTerminatorInstAndDCECond(BI); @@ -1561,7 +1536,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {      else        continue; -    DOUT << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB; +    DEBUG(errs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);      // If we need to invert the condition in the pred block to match, do so now.      if (InvertPredCond) { @@ -1605,7 +1580,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {  static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {    assert(PBI->isConditional() && BI->isConditional());    BasicBlock *BB = BI->getParent(); -   +    // If this block ends with a branch instruction, and if there is a    // predecessor that ends on a branch of the same condition, make     // this conditional branch redundant. @@ -1616,7 +1591,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {      if (BB->getSinglePredecessor()) {        // Turn this into a branch on constant.        bool CondIsTrue = PBI->getSuccessor(0) == BB; -      BI->setCondition(ConstantInt::get(Type::Int1Ty, CondIsTrue)); +      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()),  +                                        CondIsTrue));        return true;  // Nuke the branch on constant.      } @@ -1624,7 +1600,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {      // in the constant and simplify the block result.  Subsequent passes of      // simplifycfg will thread the block.      if (BlockIsSimpleEnoughToThreadThrough(BB)) { -      PHINode *NewPN = PHINode::Create(Type::Int1Ty, +      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),                                         BI->getCondition()->getName() + ".pr",                                         BB->begin());        // Okay, we're going to insert the PHI node.  Since PBI is not the only @@ -1636,7 +1612,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {              PBI->getCondition() == BI->getCondition() &&              PBI->getSuccessor(0) != PBI->getSuccessor(1)) {            bool CondIsTrue = PBI->getSuccessor(0) == BB; -          NewPN->addIncoming(ConstantInt::get(Type::Int1Ty,  +          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),                                                 CondIsTrue), *PI);          } else {            NewPN->addIncoming(BI->getCondition(), *PI); @@ -1694,8 +1670,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {    // Finally, if everything is ok, fold the branches to logical ops.    BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1); -  DOUT << "FOLDING BRs:" << *PBI->getParent() -       << "AND: " << *BI->getParent(); +  DEBUG(errs() << "FOLDING BRs:" << *PBI->getParent() +               << "AND: " << *BI->getParent());    // If OtherDest *is* BB, then BB is a basic block with a single conditional @@ -1708,12 +1684,13 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {    if (OtherDest == BB) {      // Insert it at the end of the function, because it's either code,      // or it won't matter if it's hot. :) -    BasicBlock *InfLoopBlock = BasicBlock::Create("infloop", BB->getParent()); +    BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), +                                                  "infloop", BB->getParent());      BranchInst::Create(InfLoopBlock, InfLoopBlock);      OtherDest = InfLoopBlock;    }   -  DOUT << *PBI->getParent()->getParent(); +  DEBUG(errs() << *PBI->getParent()->getParent());    // BI may have other predecessors.  Because of this, we leave    // it alone, but modify PBI. @@ -1763,9 +1740,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {      }    } -  DOUT << "INTO: " << *PBI->getParent(); -   -  DOUT << *PBI->getParent()->getParent(); +  DEBUG(errs() << "INTO: " << *PBI->getParent()); +  DEBUG(errs() << *PBI->getParent()->getParent());    // This basic block is probably dead.  We know it has at least    // one fewer predecessor. @@ -1792,7 +1768,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {    // Remove basic blocks that have no predecessors... or that just have themself    // as a predecessor.  These are unreachable.    if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) { -    DOUT << "Removing BB: \n" << *BB; +    DEBUG(errs() << "Removing BB: \n" << *BB);      DeleteDeadBlock(BB);      return true;    } @@ -1832,8 +1808,8 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {        if (!UncondBranchPreds.empty()) {          while (!UncondBranchPreds.empty()) {            BasicBlock *Pred = UncondBranchPreds.pop_back_val(); -          DOUT << "FOLDING: " << *BB -               << "INTO UNCOND BRANCH PRED: " << *Pred; +          DEBUG(errs() << "FOLDING: " << *BB +                       << "INTO UNCOND BRANCH PRED: " << *Pred);            Instruction *UncondBranch = Pred->getTerminator();            // Clone the return and add it to the end of the predecessor.            Instruction *NewRet = RI->clone(); @@ -1884,33 +1860,26 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {    } else if (isa<UnwindInst>(BB->begin())) {      // Check to see if the first instruction in this block is just an unwind.      // If so, replace any invoke instructions which use this as an exception -    // destination with call instructions, and any unconditional branch -    // predecessor with an unwind. +    // destination with call instructions.      //      SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));      while (!Preds.empty()) {        BasicBlock *Pred = Preds.back(); -      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) { -        if (BI->isUnconditional()) { -          Pred->getInstList().pop_back();  // nuke uncond branch -          new UnwindInst(Pred);            // Use unwind. -          Changed = true; -        } -      } else if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator())) +      if (InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator()))          if (II->getUnwindDest() == BB) {            // Insert a new branch instruction before the invoke, because this -          // is now a fall through... +          // is now a fall through.            BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);            Pred->getInstList().remove(II);   // Take out of symbol table -          // Insert the call now... +          // Insert the call now.            SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end());            CallInst *CI = CallInst::Create(II->getCalledValue(),                                            Args.begin(), Args.end(),                                            II->getName(), BI);            CI->setCallingConv(II->getCallingConv());            CI->setAttributes(II->getAttributes()); -          // If the invoke produced a value, the Call now does instead +          // If the invoke produced a value, the Call now does instead.            II->replaceAllUsesWith(CI);            delete II;            Changed = true; @@ -2042,7 +2011,7 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {          if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {            if (BI->isUnconditional()) {              if (BI->getSuccessor(0) == BB) { -              new UnreachableInst(TI); +              new UnreachableInst(TI->getContext(), TI);                TI->eraseFromParent();                Changed = true;              } diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 848f2b87c4ee..30cb94d90385 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -66,8 +66,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {    } else if (UnwindingBlocks.size() == 1) {      UnwindBlock = UnwindingBlocks.front();    } else { -    UnwindBlock = BasicBlock::Create("UnifiedUnwindBlock", &F); -    new UnwindInst(UnwindBlock); +    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F); +    new UnwindInst(F.getContext(), UnwindBlock);      for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),             E = UnwindingBlocks.end(); I != E; ++I) { @@ -83,8 +83,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {    } else if (UnreachableBlocks.size() == 1) {      UnreachableBlock = UnreachableBlocks.front();    } else { -    UnreachableBlock = BasicBlock::Create("UnifiedUnreachableBlock", &F); -    new UnreachableInst(UnreachableBlock); +    UnreachableBlock = BasicBlock::Create(F.getContext(),  +                                          "UnifiedUnreachableBlock", &F); +    new UnreachableInst(F.getContext(), UnreachableBlock);      for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),             E = UnreachableBlocks.end(); I != E; ++I) { @@ -107,16 +108,17 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {    // nodes (if the function returns values), and convert all of the return    // instructions into unconditional branches.    // -  BasicBlock *NewRetBlock = BasicBlock::Create("UnifiedReturnBlock", &F); +  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), +                                               "UnifiedReturnBlock", &F);    PHINode *PN = 0; -  if (F.getReturnType() == Type::VoidTy) { -    ReturnInst::Create(NULL, NewRetBlock); +  if (F.getReturnType() == Type::getVoidTy(F.getContext())) { +    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);    } else {      // If the function doesn't return void... add a PHI node to the block...      PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");      NewRetBlock->getInstList().push_back(PN); -    ReturnInst::Create(PN, NewRetBlock); +    ReturnInst::Create(F.getContext(), PN, NewRetBlock);    }    // Loop over all of the blocks, replacing the return instruction with an diff --git a/lib/Transforms/Utils/UnrollLoop.cpp b/lib/Transforms/Utils/UnrollLoop.cpp index caef7ec5c45f..4d838b50e345 100644 --- a/lib/Transforms/Utils/UnrollLoop.cpp +++ b/lib/Transforms/Utils/UnrollLoop.cpp @@ -25,6 +25,7 @@  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/Local.h" @@ -62,7 +63,7 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {    if (OnlyPred->getTerminator()->getNumSuccessors() != 1)      return 0; -  DOUT << "Merging: " << *BB << "into: " << *OnlyPred; +  DEBUG(errs() << "Merging: " << *BB << "into: " << *OnlyPred);    // Resolve any PHI nodes at the start of the block.  They are all    // guaranteed to have exactly one entry if they exist, unless there are @@ -113,7 +114,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)    if (!BI || BI->isUnconditional()) {      // The loop-rotate pass can be helpful to avoid this in many cases. -    DOUT << "  Can't unroll; loop not terminated by a conditional branch.\n"; +    DEBUG(errs() << +             "  Can't unroll; loop not terminated by a conditional branch.\n");      return false;    } @@ -125,9 +127,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)      TripMultiple = L->getSmallConstantTripMultiple();    if (TripCount != 0) -    DOUT << "  Trip Count = " << TripCount << "\n"; +    DEBUG(errs() << "  Trip Count = " << TripCount << "\n");    if (TripMultiple != 1) -    DOUT << "  Trip Multiple = " << TripMultiple << "\n"; +    DEBUG(errs() << "  Trip Multiple = " << TripMultiple << "\n");    // Effectively "DCE" unrolled iterations that are beyond the tripcount    // and will never be executed. @@ -153,17 +155,17 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)    }    if (CompletelyUnroll) { -    DOUT << "COMPLETELY UNROLLING loop %" << Header->getName() -         << " with trip count " << TripCount << "!\n"; +    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName() +          << " with trip count " << TripCount << "!\n");    } else { -    DOUT << "UNROLLING loop %" << Header->getName() -         << " by " << Count; +    DEBUG(errs() << "UNROLLING loop %" << Header->getName() +          << " by " << Count);      if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { -      DOUT << " with a breakout at trip " << BreakoutTrip; +      DEBUG(errs() << " with a breakout at trip " << BreakoutTrip);      } else if (TripMultiple != 1) { -      DOUT << " with " << TripMultiple << " trips per branch"; +      DEBUG(errs() << " with " << TripMultiple << " trips per branch");      } -    DOUT << "!\n"; +    DEBUG(errs() << "!\n");    }    std::vector<BasicBlock*> LoopBlocks = L->getBlocks(); @@ -349,7 +351,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)        if (isInstructionTriviallyDead(Inst))          (*BB)->getInstList().erase(Inst); -      else if (Constant *C = ConstantFoldInstruction(Inst)) { +      else if (Constant *C = ConstantFoldInstruction(Inst,  +                                                     Header->getContext())) {          Inst->replaceAllUsesWith(C);          (*BB)->getInstList().erase(Inst);        } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 20b676d0fb8d..2d8332f5252a 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,23 +13,27 @@  //===----------------------------------------------------------------------===//  #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/BasicBlock.h" +#include "llvm/DerivedTypes.h"  // For getNullValue(Type::Int32Ty)  #include "llvm/Constants.h"  #include "llvm/GlobalValue.h"  #include "llvm/Instruction.h" -#include "llvm/MDNode.h" +#include "llvm/LLVMContext.h" +#include "llvm/Metadata.h"  #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h"  using namespace llvm; -Value *llvm::MapValue(const Value *V, ValueMapTy &VM) { +Value *llvm::MapValue(const Value *V, ValueMapTy &VM, LLVMContext &Context) {    Value *&VMSlot = VM[V];    if (VMSlot) return VMSlot;      // Does it exist in the map yet?    // NOTE: VMSlot can be invalidated by any reference to VM, which can grow the    // DenseMap.  This includes any recursive calls to MapValue. -  // Global values do not need to be seeded into the ValueMap if they are using -  // the identity mapping. -  if (isa<GlobalValue>(V) || isa<InlineAsm>(V)) +  // Global values and metadata do not need to be seeded into the ValueMap if  +  // they are using the identity mapping. +  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MetadataBase>(V))      return VMSlot = const_cast<Value*>(V);    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { @@ -40,7 +44,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {      else if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {        for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();             i != e; ++i) { -        Value *MV = MapValue(*i, VM); +        Value *MV = MapValue(*i, VM, Context);          if (MV != *i) {            // This array must contain a reference to a global, make a new array            // and return it. @@ -51,7 +55,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {              Values.push_back(cast<Constant>(*j));            Values.push_back(cast<Constant>(MV));            for (++i; i != e; ++i) -            Values.push_back(cast<Constant>(MapValue(*i, VM))); +            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));            return VM[V] = ConstantArray::get(CA->getType(), Values);          }        } @@ -60,7 +64,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {      } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {        for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();             i != e; ++i) { -        Value *MV = MapValue(*i, VM); +        Value *MV = MapValue(*i, VM, Context);          if (MV != *i) {            // This struct must contain a reference to a global, make a new struct            // and return it. @@ -71,7 +75,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {              Values.push_back(cast<Constant>(*j));            Values.push_back(cast<Constant>(MV));            for (++i; i != e; ++i) -            Values.push_back(cast<Constant>(MapValue(*i, VM))); +            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));            return VM[V] = ConstantStruct::get(CS->getType(), Values);          }        } @@ -80,12 +84,12 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {      } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {        std::vector<Constant*> Ops;        for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) -        Ops.push_back(cast<Constant>(MapValue(*i, VM))); +        Ops.push_back(cast<Constant>(MapValue(*i, VM, Context)));        return VM[V] = CE->getWithOperands(Ops);      } else if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {        for (User::op_iterator b = CP->op_begin(), i = b, e = CP->op_end();             i != e; ++i) { -        Value *MV = MapValue(*i, VM); +        Value *MV = MapValue(*i, VM, Context);          if (MV != *i) {            // This vector value must contain a reference to a global, make a new            // vector constant and return it. @@ -96,38 +100,16 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {              Values.push_back(cast<Constant>(*j));            Values.push_back(cast<Constant>(MV));            for (++i; i != e; ++i) -            Values.push_back(cast<Constant>(MapValue(*i, VM))); +            Values.push_back(cast<Constant>(MapValue(*i, VM, Context)));            return VM[V] = ConstantVector::get(Values);          }        }        return VM[V] = C; -    } else if (MDNode *N = dyn_cast<MDNode>(C)) { -      for (MDNode::const_elem_iterator b = N->elem_begin(), i = b, -             e = N->elem_end(); i != e; ++i) { -        if (!*i) continue; - -        Value *MV = MapValue(*i, VM); -        if (MV != *i) { -          // This MDNode must contain a reference to a global, make a new MDNode -          // and return it. -	  SmallVector<Value*, 8> Values; -          Values.reserve(N->getNumElements()); -          for (MDNode::const_elem_iterator j = b; j != i; ++j) -            Values.push_back(*j); -          Values.push_back(MV); -          for (++i; i != e; ++i) -            Values.push_back(MapValue(*i, VM)); -          return VM[V] = MDNode::get(Values.data(), Values.size()); -        } -      } -      return VM[V] = C; -      } else { -      assert(0 && "Unknown type of constant!"); +      llvm_unreachable("Unknown type of constant!");      }    } -    return 0;  } @@ -136,7 +118,7 @@ Value *llvm::MapValue(const Value *V, ValueMapTy &VM) {  ///  void llvm::RemapInstruction(Instruction *I, ValueMapTy &ValueMap) {    for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { -    Value *V = MapValue(*op, ValueMap); +    Value *V = MapValue(*op, ValueMap, I->getParent()->getContext());      assert(V && "Referenced value not in value map!");      *op = V;    }  | 
