vendor/llvm/llvm-trunk-r242221

author: Dimitry Andric <dim@FreeBSD.org> 2015-08-07 23:01:33 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2015-08-07 23:01:33 +0000
commit: ee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch)
tree: 52d1861acda1205241ee35a94aa63129c604d469 /lib
parent: 1a82d4c088707c791c792f6822f611b47a12bdfe (diff)
412 files changed, 16888 insertions, 6713 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index ad0727a0e0e5..44d137dffd22 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -71,11 +71,6 @@ void AliasAnalysis::deleteValue(Value *V) {
   AA->deleteValue(V);
 }
 
-void AliasAnalysis::copyValue(Value *From, Value *To) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  AA->copyValue(From, To);
-}
-
 void AliasAnalysis::addEscapingUse(Use &U) {
   assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
   AA->addEscapingUse(U);
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index 1ef49fc02fef..e5107b3bc827 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -124,10 +124,6 @@ namespace {
       assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
       AliasAnalysis::deleteValue(V);
     }
-    void copyValue(Value *From, Value *To) override {
-      Vals.insert(To);
-      AliasAnalysis::copyValue(From, To);
-    }
 
   };
 }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index bf8cda1ffaec..54d0f4304e1f 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -544,9 +544,6 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
 // the tracker already knows about a value, it will ignore the request.
 //
 void AliasSetTracker::copyValue(Value *From, Value *To) {
-  // Notify the alias analysis implementation that this value is copied.
-  AA.copyValue(From, To);
-
   // First, look up the PointerRec for this pointer.
   PointerMapType::iterator I = PointerMap.find_as(From);
   if (I == PointerMap.end())
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 8e812252fdfe..68f766edb301 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -685,6 +685,9 @@ BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
   if (CS.onlyReadsMemory())
     Min = OnlyReadsMemory;
 
+  if (CS.onlyAccessesArgMemory())
+    Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+
   // The AliasAnalysis base class has some smarts, lets use them.
   return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
 }
@@ -710,6 +713,9 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
   if (F->onlyReadsMemory())
     Min = OnlyReadsMemory;
 
+  if (F->onlyAccessesArgMemory())
+    Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+
   const TargetLibraryInfo &TLI =
       getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   if (isMemsetPattern16(F, TLI))
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 2f4c6a92f9af..02a5aef03223 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1234,6 +1234,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::floor:
   case Intrinsic::ceil:
   case Intrinsic::sqrt:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
   case Intrinsic::pow:
   case Intrinsic::powi:
   case Intrinsic::bswap:
@@ -1450,6 +1452,10 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
           return ConstantFoldFP(floor, V, Ty);
         case Intrinsic::ceil:
           return ConstantFoldFP(ceil, V, Ty);
+        case Intrinsic::sin:
+          return ConstantFoldFP(sin, V, Ty);
+        case Intrinsic::cos:
+          return ConstantFoldFP(cos, V, Ty);
       }
 
       if (!TLI)
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index f1ddde252924..18d45dd6a396 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -42,94 +42,111 @@ STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
 STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
 
 namespace {
-  /// FunctionRecord - One instance of this structure is stored for every
-  /// function in the program.  Later, the entries for these functions are
-  /// removed if the function is found to call an external function (in which
-  /// case we know nothing about it.
-  struct FunctionRecord {
-    /// GlobalInfo - Maintain mod/ref info for all of the globals without
-    /// addresses taken that are read or written (transitively) by this
-    /// function.
-    std::map<const GlobalValue*, unsigned> GlobalInfo;
-
-    /// MayReadAnyGlobal - May read global variables, but it is not known which.
-    bool MayReadAnyGlobal;
-
-    unsigned getInfoForGlobal(const GlobalValue *GV) const {
-      unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
-      std::map<const GlobalValue*, unsigned>::const_iterator I =
+/// FunctionRecord - One instance of this structure is stored for every
+/// function in the program.  Later, the entries for these functions are
+/// removed if the function is found to call an external function (in which
+/// case we know nothing about it.
+struct FunctionRecord {
+  /// GlobalInfo - Maintain mod/ref info for all of the globals without
+  /// addresses taken that are read or written (transitively) by this
+  /// function.
+  std::map<const GlobalValue *, unsigned> GlobalInfo;
+
+  /// MayReadAnyGlobal - May read global variables, but it is not known which.
+  bool MayReadAnyGlobal;
+
+  unsigned getInfoForGlobal(const GlobalValue *GV) const {
+    unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+    std::map<const GlobalValue *, unsigned>::const_iterator I =
         GlobalInfo.find(GV);
-      if (I != GlobalInfo.end())
-        Effect |= I->second;
-      return Effect;
-    }
+    if (I != GlobalInfo.end())
+      Effect |= I->second;
+    return Effect;
+  }
 
-    /// FunctionEffect - Capture whether or not this function reads or writes to
-    /// ANY memory.  If not, we can do a lot of aggressive analysis on it.
-    unsigned FunctionEffect;
+  /// FunctionEffect - Capture whether or not this function reads or writes to
+  /// ANY memory.  If not, we can do a lot of aggressive analysis on it.
+  unsigned FunctionEffect;
 
-    FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
-  };
+  FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {}
+};
 
-  /// GlobalsModRef - The actual analysis pass.
-  class GlobalsModRef : public ModulePass, public AliasAnalysis {
-    /// NonAddressTakenGlobals - The globals that do not have their addresses
-    /// taken.
-    std::set<const GlobalValue*> NonAddressTakenGlobals;
+/// GlobalsModRef - The actual analysis pass.
+class GlobalsModRef : public ModulePass, public AliasAnalysis {
+  /// NonAddressTakenGlobals - The globals that do not have their addresses
+  /// taken.
+  std::set<const GlobalValue *> NonAddressTakenGlobals;
 
-    /// IndirectGlobals - The memory pointed to by this global is known to be
-    /// 'owned' by the global.
-    std::set<const GlobalValue*> IndirectGlobals;
+  /// IndirectGlobals - The memory pointed to by this global is known to be
+  /// 'owned' by the global.
+  std::set<const GlobalValue *> IndirectGlobals;
 
-    /// AllocsForIndirectGlobals - If an instruction allocates memory for an
-    /// indirect global, this map indicates which one.
-    std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+  /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+  /// indirect global, this map indicates which one.
+  std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
 
-    /// FunctionInfo - For each function, keep track of what globals are
-    /// modified or read.
-    std::map<const Function*, FunctionRecord> FunctionInfo;
+  /// FunctionInfo - For each function, keep track of what globals are
+  /// modified or read.
+  std::map<const Function *, FunctionRecord> FunctionInfo;
 
-  public:
-    static char ID;
-    GlobalsModRef() : ModulePass(ID) {
-      initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
-    }
+public:
+  static char ID;
+  GlobalsModRef() : ModulePass(ID) {
+    initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+  }
 
-    bool runOnModule(Module &M) override {
-      InitializeAliasAnalysis(this, &M.getDataLayout());
+  bool runOnModule(Module &M) override {
+    InitializeAliasAnalysis(this, &M.getDataLayout());
 
-      // Find non-addr taken globals.
-      AnalyzeGlobals(M);
+    // Find non-addr taken globals.
+    AnalyzeGlobals(M);
 
-      // Propagate on CG.
-      AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
-      return false;
-    }
+    // Propagate on CG.
+    AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
+    return false;
+  }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AliasAnalysis::getAnalysisUsage(AU);
-      AU.addRequired<CallGraphWrapperPass>();
-      AU.setPreservesAll();                         // Does not transform code
-    }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AliasAnalysis::getAnalysisUsage(AU);
+    AU.addRequired<CallGraphWrapperPass>();
+    AU.setPreservesAll(); // Does not transform code
+  }
+
+  //------------------------------------------------
+  // Implement the AliasAnalysis API
+  //
+  AliasResult alias(const MemoryLocation &LocA,
+                    const MemoryLocation &LocB) override;
+  ModRefResult getModRefInfo(ImmutableCallSite CS,
+                             const MemoryLocation &Loc) override;
+  ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                             ImmutableCallSite CS2) override {
+    return AliasAnalysis::getModRefInfo(CS1, CS2);
+  }
 
-    //------------------------------------------------
-    // Implement the AliasAnalysis API
-    //
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override;
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override;
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override {
-      return AliasAnalysis::getModRefInfo(CS1, CS2);
+  /// getModRefBehavior - Return the behavior of the specified function if
+  /// called from the specified call site.  The call site may be null in which
+  /// case the most generic behavior of this function should be returned.
+  ModRefBehavior getModRefBehavior(const Function *F) override {
+    ModRefBehavior Min = UnknownModRefBehavior;
+
+    if (FunctionRecord *FR = getFunctionInfo(F)) {
+      if (FR->FunctionEffect == 0)
+        Min = DoesNotAccessMemory;
+      else if ((FR->FunctionEffect & Mod) == 0)
+        Min = OnlyReadsMemory;
     }
 
-    /// getModRefBehavior - Return the behavior of the specified function if
-    /// called from the specified call site.  The call site may be null in which
-    /// case the most generic behavior of this function should be returned.
-    ModRefBehavior getModRefBehavior(const Function *F) override {
-      ModRefBehavior Min = UnknownModRefBehavior;
+    return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+  }
+
+  /// getModRefBehavior - Return the behavior of the specified function if
+  /// called from the specified call site.  The call site may be null in which
+  /// case the most generic behavior of this function should be returned.
+  ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
+    ModRefBehavior Min = UnknownModRefBehavior;
 
+    if (const Function *F = CS.getCalledFunction())
       if (FunctionRecord *FR = getFunctionInfo(F)) {
         if (FR->FunctionEffect == 0)
           Min = DoesNotAccessMemory;
@@ -137,68 +154,50 @@ namespace {
           Min = OnlyReadsMemory;
       }
 
-      return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
-    }
-    
-    /// getModRefBehavior - Return the behavior of the specified function if
-    /// called from the specified call site.  The call site may be null in which
-    /// case the most generic behavior of this function should be returned.
-    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
-      ModRefBehavior Min = UnknownModRefBehavior;
-
-      if (const Function* F = CS.getCalledFunction())
-        if (FunctionRecord *FR = getFunctionInfo(F)) {
-          if (FR->FunctionEffect == 0)
-            Min = DoesNotAccessMemory;
-          else if ((FR->FunctionEffect & Mod) == 0)
-            Min = OnlyReadsMemory;
-        }
+    return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+  }
 
-      return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
-    }
+  void deleteValue(Value *V) override;
+  void addEscapingUse(Use &U) override;
+
+  /// getAdjustedAnalysisPointer - This method is used when a pass implements
+  /// an analysis interface through multiple inheritance.  If needed, it
+  /// should override this to adjust the this pointer as needed for the
+  /// specified pass info.
+  void *getAdjustedAnalysisPointer(AnalysisID PI) override {
+    if (PI == &AliasAnalysis::ID)
+      return (AliasAnalysis *)this;
+    return this;
+  }
 
-    void deleteValue(Value *V) override;
-    void copyValue(Value *From, Value *To) override;
-    void addEscapingUse(Use &U) override;
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
-      if (PI == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-    
-  private:
-    /// getFunctionInfo - Return the function info for the function, or null if
-    /// we don't have anything useful to say about it.
-    FunctionRecord *getFunctionInfo(const Function *F) {
-      std::map<const Function*, FunctionRecord>::iterator I =
+private:
+  /// getFunctionInfo - Return the function info for the function, or null if
+  /// we don't have anything useful to say about it.
+  FunctionRecord *getFunctionInfo(const Function *F) {
+    std::map<const Function *, FunctionRecord>::iterator I =
         FunctionInfo.find(F);
-      if (I != FunctionInfo.end())
-        return &I->second;
-      return nullptr;
-    }
+    if (I != FunctionInfo.end())
+      return &I->second;
+    return nullptr;
+  }
 
-    void AnalyzeGlobals(Module &M);
-    void AnalyzeCallGraph(CallGraph &CG, Module &M);
-    bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
-                              std::vector<Function*> &Writers,
-                              GlobalValue *OkayStoreDest = nullptr);
-    bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
-  };
+  void AnalyzeGlobals(Module &M);
+  void AnalyzeCallGraph(CallGraph &CG, Module &M);
+  bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers,
+                            std::vector<Function *> &Writers,
+                            GlobalValue *OkayStoreDest = nullptr);
+  bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+};
 }
 
 char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
-                "globalsmodref-aa", "Simple mod/ref analysis for globals",    
-                false, true, false)
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
+                         "Simple mod/ref analysis for globals", false, true,
+                         false)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
-                "globalsmodref-aa", "Simple mod/ref analysis for globals",    
-                false, true, false)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
+                       "Simple mod/ref analysis for globals", false, true,
+                       false)
 
 Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
 
@@ -207,7 +206,7 @@ Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
 /// (really, their address passed to something nontrivial), record this fact,
 /// and record the functions that they are used directly in.
 void GlobalsModRef::AnalyzeGlobals(Module &M) {
-  std::vector<Function*> Readers, Writers;
+  std::vector<Function *> Readers, Writers;
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     if (I->hasLocalLinkage()) {
       if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
@@ -215,11 +214,12 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
         NonAddressTakenGlobals.insert(I);
         ++NumNonAddrTakenFunctions;
       }
-      Readers.clear(); Writers.clear();
+      Readers.clear();
+      Writers.clear();
     }
 
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+       ++I)
     if (I->hasLocalLinkage()) {
       if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
         // Remember that we are tracking this global, and the mod/ref fns
@@ -228,7 +228,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
         for (unsigned i = 0, e = Readers.size(); i != e; ++i)
           FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
 
-        if (!I->isConstant())  // No need to keep track of writers to constants
+        if (!I->isConstant()) // No need to keep track of writers to constants
           for (unsigned i = 0, e = Writers.size(); i != e; ++i)
             FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
         ++NumNonAddrTakenGlobalVars;
@@ -238,7 +238,8 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
             AnalyzeIndirectGlobalMemory(I))
           ++NumIndirectGlobalVars;
       }
-      Readers.clear(); Writers.clear();
+      Readers.clear();
+      Writers.clear();
     }
 }
 
@@ -249,10 +250,11 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
 ///
 /// If OkayStoreDest is non-null, stores into this global are allowed.
 bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
-                                         std::vector<Function*> &Readers,
-                                         std::vector<Function*> &Writers,
+                                         std::vector<Function *> &Readers,
+                                         std::vector<Function *> &Writers,
                                          GlobalValue *OkayStoreDest) {
-  if (!V->getType()->isPointerTy()) return true;
+  if (!V->getType()->isPointerTy())
+    return true;
 
   for (Use &U : V->uses()) {
     User *I = U.getUser();
@@ -262,7 +264,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
       if (V == SI->getOperand(1)) {
         Writers.push_back(SI->getParent()->getParent());
       } else if (SI->getOperand(1) != OkayStoreDest) {
-        return true;  // Storing the pointer
+        return true; // Storing the pointer
       }
     } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
       if (AnalyzeUsesOfPointer(I, Readers, Writers))
@@ -282,7 +284,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
       }
     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
-        return true;  // Allow comparison against null.
+        return true; // Allow comparison against null.
     } else {
       return true;
     }
@@ -301,7 +303,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
 bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
   // Keep track of values related to the allocation of the memory, f.e. the
   // value produced by the malloc call and any casts.
-  std::vector<Value*> AllocRelatedValues;
+  std::vector<Value *> AllocRelatedValues;
 
   // Walk the user list of the global.  If we find anything other than a direct
   // load or store, bail out.
@@ -310,13 +312,14 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
       // The pointer loaded from the global can only be used in simple ways:
       // we allow addressing of it and loading storing to it.  We do *not* allow
       // storing the loaded pointer somewhere else or passing to a function.
-      std::vector<Function*> ReadersWriters;
+      std::vector<Function *> ReadersWriters;
       if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
-        return false;  // Loaded pointer escapes.
+        return false; // Loaded pointer escapes.
       // TODO: Could try some IP mod/ref of the loaded pointer.
     } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
       // Storing the global itself.
-      if (SI->getOperand(0) == GV) return false;
+      if (SI->getOperand(0) == GV)
+        return false;
 
       // If storing the null pointer, ignore it.
       if (isa<ConstantPointerNull>(SI->getOperand(0)))
@@ -327,13 +330,13 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
                                        GV->getParent()->getDataLayout());
 
       if (!isAllocLikeFn(Ptr, TLI))
-        return false;  // Too hard to analyze.
+        return false; // Too hard to analyze.
 
       // Analyze all uses of the allocation.  If any of them are used in a
       // non-simple way (e.g. stored to another global) bail out.
-      std::vector<Function*> ReadersWriters;
+      std::vector<Function *> ReadersWriters;
       if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
-        return false;  // Loaded pointer escapes.
+        return false; // Loaded pointer escapes.
 
       // Remember that this allocation is related to the indirect global.
       AllocRelatedValues.push_back(Ptr);
@@ -360,7 +363,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
 void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
   // We do a bottom-up SCC traversal of the call graph.  In other words, we
   // visit all callees before callers (leaf-first).
-  for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
     const std::vector<CallGraphNode *> &SCC = *I;
     assert(!SCC.empty() && "SCC with no functions?");
 
@@ -437,9 +440,10 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
     }
 
     // Scan the function bodies for explicit loads or stores.
-    for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+    for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;
+         ++i)
       for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
-             E = inst_end(SCC[i]->getFunction());
+                         E = inst_end(SCC[i]->getFunction());
            II != E && FunctionEffect != ModRef; ++II)
         if (LoadInst *LI = dyn_cast<LoadInst>(&*II)) {
           FunctionEffect |= Ref;
@@ -474,8 +478,6 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
   }
 }
 
-
-
 /// alias - If one of the pointers is to a global that we are tracking, and the
 /// other is some random pointer, we know there cannot be an alias, because the
 /// address of the global isn't taken.
@@ -492,8 +494,10 @@ AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
   if (GV1 || GV2) {
     // If the global's address is taken, pretend we don't know it's a pointer to
     // the global.
-    if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr;
-    if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr;
+    if (GV1 && !NonAddressTakenGlobals.count(GV1))
+      GV1 = nullptr;
+    if (GV2 && !NonAddressTakenGlobals.count(GV2))
+      GV2 = nullptr;
 
     // If the two pointers are derived from two different non-addr-taken
     // globals, or if one is and the other isn't, we know these can't alias.
@@ -554,7 +558,6 @@ GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
   return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
 }
 
-
 //===----------------------------------------------------------------------===//
 // Methods to update the analysis as a result of the client transformation.
 //
@@ -565,9 +568,10 @@ void GlobalsModRef::deleteValue(Value *V) {
       // any AllocRelatedValues for it.
       if (IndirectGlobals.erase(GV)) {
         // Remove any entries in AllocsForIndirectGlobals for this global.
-        for (std::map<const Value*, const GlobalValue*>::iterator
-             I = AllocsForIndirectGlobals.begin(),
-             E = AllocsForIndirectGlobals.end(); I != E; ) {
+        for (std::map<const Value *, const GlobalValue *>::iterator
+                 I = AllocsForIndirectGlobals.begin(),
+                 E = AllocsForIndirectGlobals.end();
+             I != E;) {
           if (I->second == GV) {
             AllocsForIndirectGlobals.erase(I++);
           } else {
@@ -585,16 +589,12 @@ void GlobalsModRef::deleteValue(Value *V) {
   AliasAnalysis::deleteValue(V);
 }
 
-void GlobalsModRef::copyValue(Value *From, Value *To) {
-  AliasAnalysis::copyValue(From, To);
-}
-
 void GlobalsModRef::addEscapingUse(Use &U) {
   // For the purposes of this analysis, it is conservatively correct to treat
   // a newly escaping value equivalently to a deleted one.  We could perhaps
   // be more precise by processing the new use and attempting to update our
   // saved analysis results to accommodate it.
   deleteValue(U);
-  
+
   AliasAnalysis::addEscapingUse(U);
 }
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 349b9cac2c2d..c0d2e375cb04 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -783,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
       case Intrinsic::memmove:
         // SROA can usually chew through these intrinsics, but they aren't free.
         return false;
-      case Intrinsic::frameescape:
+      case Intrinsic::localescape:
         HasFrameEscape = true;
         return false;
       }
@@ -1424,11 +1424,11 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
           cast<CallInst>(CS.getInstruction())->canReturnTwice())
         return false;
 
-      // Disallow inlining functions that call @llvm.frameescape. Doing this
+      // Disallow inlining functions that call @llvm.localescape. Doing this
       // correctly would require major changes to the inliner.
       if (CS.getCalledFunction() &&
           CS.getCalledFunction()->getIntrinsicID() ==
-              llvm::Intrinsic::frameescape)
+              llvm::Intrinsic::localescape)
         return false;
     }
   }
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index b88b2496b875..926787d3be91 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -12,8 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/IVUsers.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -34,6 +36,7 @@ using namespace llvm;
 char IVUsers::ID = 0;
 INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
@@ -137,6 +140,11 @@ bool IVUsers::AddUsersImpl(Instruction *I,
   if (Width > 64 || !DL.isLegalInteger(Width))
     return false;
 
+  // Don't attempt to promote ephemeral values to indvars. They will be removed
+  // later anyway.
+  if (EphValues.count(I))
+    return false;
+
   // Get the symbolic expression for this instruction.
   const SCEV *ISE = SE->getSCEV(I);
 
@@ -244,6 +252,7 @@ IVUsers::IVUsers()
 }
 
 void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AssumptionCacheTracker>();
   AU.addRequired<LoopInfoWrapperPass>();
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.addRequired<ScalarEvolution>();
@@ -253,10 +262,16 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
 bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
 
   L = l;
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+      *L->getHeader()->getParent());
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   SE = &getAnalysis<ScalarEvolution>();
 
+  // Collect ephemeral values so that AddUsersIfInteresting skips them.
+  EphValues.clear();
+  CodeMetrics::collectEphemeralValues(L, AC, EphValues);
+
   // Find all uses of induction variables in this loop, and categorize
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 12e406bb1a2d..fa42b48b6cdb 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
@@ -3046,7 +3047,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const Query &Q, unsigned MaxRecurse) {
+                               FastMathFlags FMF, const Query &Q,
+                               unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
 
@@ -3065,6 +3067,14 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (Pred == FCmpInst::FCMP_TRUE)
     return ConstantInt::get(GetCompareTy(LHS), 1);
 
+  // UNO/ORD predicates can be trivially folded if NaNs are ignored.
+  if (FMF.noNaNs()) {
+    if (Pred == FCmpInst::FCMP_UNO)
+      return ConstantInt::get(GetCompareTy(LHS), 0);
+    if (Pred == FCmpInst::FCMP_ORD)
+      return ConstantInt::get(GetCompareTy(LHS), 1);
+  }
+
   // fcmp pred x, undef  and  fcmp pred undef, x
   // fold to true if unordered, false if ordered
   if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
@@ -3151,12 +3161,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const DataLayout &DL,
+                              FastMathFlags FMF, const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
                               const Instruction *CxtI) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
-                            RecursionLimit);
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF,
+                            Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
 }
 
 /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
@@ -3511,6 +3521,82 @@ Value *llvm::SimplifyInsertValueInst(
                                    RecursionLimit);
 }
 
+/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we
+/// can fold the result.  If not, this returns null.
+static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+                                       const Query &, unsigned) {
+  if (auto *CAgg = dyn_cast<Constant>(Agg))
+    return ConstantFoldExtractValueInstruction(CAgg, Idxs);
+
+  // extractvalue x, (insertvalue y, elt, n), n -> elt
+  unsigned NumIdxs = Idxs.size();
+  for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr;
+       IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())) {
+    ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices();
+    unsigned NumInsertValueIdxs = InsertValueIdxs.size();
+    unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs);
+    if (InsertValueIdxs.slice(0, NumCommonIdxs) ==
+        Idxs.slice(0, NumCommonIdxs)) {
+      if (NumIdxs == NumInsertValueIdxs)
+        return IVI->getInsertedValueOperand();
+      break;
+    }
+  }
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
+                                      const DataLayout &DL,
+                                      const TargetLibraryInfo *TLI,
+                                      const DominatorTree *DT,
+                                      AssumptionCache *AC,
+                                      const Instruction *CxtI) {
+  return ::SimplifyExtractValueInst(Agg, Idxs, Query(DL, TLI, DT, AC, CxtI),
+                                    RecursionLimit);
+}
+
+/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we
+/// can fold the result.  If not, this returns null.
+static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
+                                         unsigned) {
+  if (auto *CVec = dyn_cast<Constant>(Vec)) {
+    if (auto *CIdx = dyn_cast<Constant>(Idx))
+      return ConstantFoldExtractElementInstruction(CVec, CIdx);
+
+    // The index is not relevant if our vector is a splat.
+    if (auto *Splat = CVec->getSplatValue())
+      return Splat;
+
+    if (isa<UndefValue>(Vec))
+      return UndefValue::get(Vec->getType()->getVectorElementType());
+  }
+
+  // If extracting a specified index from the vector, see if we can recursively
+  // find a previously computed scalar that was inserted into the vector.
+  if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
+    unsigned IndexVal = IdxC->getZExtValue();
+    unsigned VectorWidth = Vec->getType()->getVectorNumElements();
+
+    // If this is extracting an invalid index, turn this into undef, to avoid
+    // crashing the code below.
+    if (IndexVal >= VectorWidth)
+      return UndefValue::get(Vec->getType()->getVectorElementType());
+
+    if (Value *Elt = findScalarElement(Vec, IndexVal))
+      return Elt;
+  }
+
+  return nullptr;
+}
+
+Value *llvm::SimplifyExtractElementInst(
+    Value *Vec, Value *Idx, const DataLayout &DL, const TargetLibraryInfo *TLI,
+    const DominatorTree *DT, AssumptionCache *AC, const Instruction *CxtI) {
+  return ::SimplifyExtractElementInst(Vec, Idx, Query(DL, TLI, DT, AC, CxtI),
+                                      RecursionLimit);
+}
+
 /// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
 static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   // If all of the PHI's incoming values are the same then replace the PHI node
@@ -3670,7 +3756,7 @@ static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const Query &Q, unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
     return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
-  return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
 }
 
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3900,9 +3986,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
                          I->getOperand(1), DL, TLI, DT, AC, I);
     break;
   case Instruction::FCmp:
-    Result =
-        SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), I->getOperand(0),
-                         I->getOperand(1), DL, TLI, DT, AC, I);
+    Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), DL, TLI, DT, AC, I);
     break;
   case Instruction::Select:
     Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
@@ -3920,6 +4006,18 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
                                      IV->getIndices(), DL, TLI, DT, AC, I);
     break;
   }
+  case Instruction::ExtractValue: {
+    auto *EVI = cast<ExtractValueInst>(I);
+    Result = SimplifyExtractValueInst(EVI->getAggregateOperand(),
+                                      EVI->getIndices(), DL, TLI, DT, AC, I);
+    break;
+  }
+  case Instruction::ExtractElement: {
+    auto *EEI = cast<ExtractElementInst>(I);
+    Result = SimplifyExtractElementInst(
+        EEI->getVectorOperand(), EEI->getIndexOperand(), DL, TLI, DT, AC, I);
+    break;
+  }
   case Instruction::PHI:
     Result = SimplifyPHINode(cast<PHINode>(I), Query(DL, TLI, DT, AC, I));
     break;
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index b11cd7e84a6d..becbae4c5b50 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -48,6 +48,13 @@ static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
     cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
 unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
 
+/// \brief The maximum iterations used to merge memory checks
+static cl::opt<unsigned> MemoryCheckMergeThreshold(
+    "memory-check-merge-threshold", cl::Hidden,
+    cl::desc("Maximum number of comparisons done when trying to merge "
+             "runtime memory checks. (default = 100)"),
+    cl::init(100));
+
 /// Maximum SIMD width.
 const unsigned VectorizerParams::MaxVectorWidth = 64;
 
@@ -112,35 +119,182 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
   return SE->getSCEV(Ptr);
 }
 
-void LoopAccessInfo::RuntimePointerCheck::insert(
-    ScalarEvolution *SE, Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
-    unsigned ASId, const ValueToValueMap &Strides) {
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
+                                    unsigned DepSetId, unsigned ASId,
+                                    const ValueToValueMap &Strides) {
   // Get the stride replaced scev.
   const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
   assert(AR && "Invalid addrec expression");
   const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
   const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
-  Pointers.push_back(Ptr);
-  Starts.push_back(AR->getStart());
-  Ends.push_back(ScEnd);
-  IsWritePtr.push_back(WritePtr);
-  DependencySetId.push_back(DepSetId);
-  AliasSetId.push_back(ASId);
+  Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
+                        Sc);
+}
+
+bool RuntimePointerChecking::needsChecking(
+    const CheckingPtrGroup &M, const CheckingPtrGroup &N,
+    const SmallVectorImpl<int> *PtrPartition) const {
+  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
+    for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
+      if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+        return true;
+  return false;
+}
+
+/// Compare \p I and \p J and return the minimum.
+/// Return nullptr in case we couldn't find an answer.
+static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
+                                   ScalarEvolution *SE) {
+  const SCEV *Diff = SE->getMinusSCEV(J, I);
+  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
+
+  if (!C)
+    return nullptr;
+  if (C->getValue()->isNegative())
+    return J;
+  return I;
+}
+
+bool RuntimePointerChecking::CheckingPtrGroup::addPointer(unsigned Index) {
+  const SCEV *Start = RtCheck.Pointers[Index].Start;
+  const SCEV *End = RtCheck.Pointers[Index].End;
+
+  // Compare the starts and ends with the known minimum and maximum
+  // of this set. We need to know how we compare against the min/max
+  // of the set in order to be able to emit memchecks.
+  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
+  if (!Min0)
+    return false;
+
+  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
+  if (!Min1)
+    return false;
+
+  // Update the low bound  expression if we've found a new min value.
+  if (Min0 == Start)
+    Low = Start;
+
+  // Update the high bound expression if we've found a new max value.
+  if (Min1 != End)
+    High = End;
+
+  Members.push_back(Index);
+  return true;
 }
 
-bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
+void RuntimePointerChecking::groupChecks(
+    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+  // We build the groups from dependency candidates equivalence classes
+  // because:
+  //    - We know that pointers in the same equivalence class share
+  //      the same underlying object and therefore there is a chance
+  //      that we can compare pointers
+  //    - We wouldn't be able to merge two pointers for which we need
+  //      to emit a memcheck. The classes in DepCands are already
+  //      conveniently built such that no two pointers in the same
+  //      class need checking against each other.
+
+  // We use the following (greedy) algorithm to construct the groups
+  // For every pointer in the equivalence class:
+  //   For each existing group:
+  //   - if the difference between this pointer and the min/max bounds
+  //     of the group is a constant, then make the pointer part of the
+  //     group and update the min/max bounds of that group as required.
+
+  CheckingGroups.clear();
+
+  // If we don't have the dependency partitions, construct a new
+  // checking pointer group for each pointer.
+  if (!UseDependencies) {
+    for (unsigned I = 0; I < Pointers.size(); ++I)
+      CheckingGroups.push_back(CheckingPtrGroup(I, *this));
+    return;
+  }
+
+  unsigned TotalComparisons = 0;
+
+  DenseMap<Value *, unsigned> PositionMap;
+  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
+    PositionMap[Pointers[Index].PointerValue] = Index;
+
+  // We need to keep track of what pointers we've already seen so we
+  // don't process them twice.
+  SmallSet<unsigned, 2> Seen;
+
+  // Go through all equivalence classes, get the the "pointer check groups"
+  // and add them to the overall solution. We use the order in which accesses
+  // appear in 'Pointers' to enforce determinism.
+  for (unsigned I = 0; I < Pointers.size(); ++I) {
+    // We've seen this pointer before, and therefore already processed
+    // its equivalence class.
+    if (Seen.count(I))
+      continue;
+
+    MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
+                                           Pointers[I].IsWritePtr);
+
+    SmallVector<CheckingPtrGroup, 2> Groups;
+    auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
+
+    // Because DepCands is constructed by visiting accesses in the order in
+    // which they appear in alias sets (which is deterministic) and the
+    // iteration order within an equivalence class member is only dependent on
+    // the order in which unions and insertions are performed on the
+    // equivalence class, the iteration order is deterministic.
+    for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
+         MI != ME; ++MI) {
+      unsigned Pointer = PositionMap[MI->getPointer()];
+      bool Merged = false;
+      // Mark this pointer as seen.
+      Seen.insert(Pointer);
+
+      // Go through all the existing sets and see if we can find one
+      // which can include this pointer.
+      for (CheckingPtrGroup &Group : Groups) {
+        // Don't perform more than a certain amount of comparisons.
+        // This should limit the cost of grouping the pointers to something
+        // reasonable.  If we do end up hitting this threshold, the algorithm
+        // will create separate groups for all remaining pointers.
+        if (TotalComparisons > MemoryCheckMergeThreshold)
+          break;
+
+        TotalComparisons++;
+
+        if (Group.addPointer(Pointer)) {
+          Merged = true;
+          break;
+        }
+      }
+
+      if (!Merged)
+        // We couldn't add this pointer to any existing set or the threshold
+        // for the number of comparisons has been reached. Create a new group
+        // to hold the current pointer.
+        Groups.push_back(CheckingPtrGroup(Pointer, *this));
+    }
+
+    // We've computed the grouped checks for this partition.
+    // Save the results and continue with the next one.
+    std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
+  }
+}
+
+bool RuntimePointerChecking::needsChecking(
     unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+  const PointerInfo &PointerI = Pointers[I];
+  const PointerInfo &PointerJ = Pointers[J];
+
   // No need to check if two readonly pointers intersect.
-  if (!IsWritePtr[I] && !IsWritePtr[J])
+  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
     return false;
 
   // Only need to check pointers between two different dependency sets.
-  if (DependencySetId[I] == DependencySetId[J])
+  if (PointerI.DependencySetId == PointerJ.DependencySetId)
     return false;
 
   // Only need to check pointers in the same alias set.
-  if (AliasSetId[I] != AliasSetId[J])
+  if (PointerI.AliasSetId != PointerJ.AliasSetId)
     return false;
 
   // If PtrPartition is set omit checks between pointers of the same partition.
@@ -153,45 +307,75 @@ bool LoopAccessInfo::RuntimePointerCheck::needsChecking(
   return true;
 }
 
-void LoopAccessInfo::RuntimePointerCheck::print(
+void RuntimePointerChecking::print(
     raw_ostream &OS, unsigned Depth,
     const SmallVectorImpl<int> *PtrPartition) const {
-  unsigned NumPointers = Pointers.size();
-  if (NumPointers == 0)
-    return;
 
   OS.indent(Depth) << "Run-time memory checks:\n";
+
   unsigned N = 0;
-  for (unsigned I = 0; I < NumPointers; ++I)
-    for (unsigned J = I + 1; J < NumPointers; ++J)
-      if (needsChecking(I, J, PtrPartition)) {
-        OS.indent(Depth) << N++ << ":\n";
-        OS.indent(Depth + 2) << *Pointers[I];
-        if (PtrPartition)
-          OS << " (Partition: " << (*PtrPartition)[I] << ")";
-        OS << "\n";
-        OS.indent(Depth + 2) << *Pointers[J];
-        if (PtrPartition)
-          OS << " (Partition: " << (*PtrPartition)[J] << ")";
-        OS << "\n";
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I)
+    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
+      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
+        OS.indent(Depth) << "Check " << N++ << ":\n";
+        OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
+
+        for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
+          OS.indent(Depth + 2)
+              << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
+          if (PtrPartition)
+            OS << " (Partition: "
+               << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
+               << "\n";
+        }
+
+        OS.indent(Depth + 2) << "Against group " << J << ":\n";
+
+        for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
+          OS.indent(Depth + 2)
+              << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
+          if (PtrPartition)
+            OS << " (Partition: "
+               << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
+               << "\n";
+        }
       }
+
+  OS.indent(Depth) << "Grouped accesses:\n";
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+    OS.indent(Depth + 2) << "Group " << I << ":\n";
+    OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
+                         << " High: " << *CheckingGroups[I].High << ")\n";
+    for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
+      OS.indent(Depth + 6) << "Member: "
+                           << *Pointers[CheckingGroups[I].Members[J]].Expr
+                           << "\n";
+    }
+  }
 }
 
-unsigned LoopAccessInfo::RuntimePointerCheck::getNumberOfChecks(
+unsigned RuntimePointerChecking::getNumberOfChecks(
     const SmallVectorImpl<int> *PtrPartition) const {
-  unsigned NumPointers = Pointers.size();
+
+  unsigned NumPartitions = CheckingGroups.size();
   unsigned CheckCount = 0;
 
-  for (unsigned I = 0; I < NumPointers; ++I)
-    for (unsigned J = I + 1; J < NumPointers; ++J)
-      if (needsChecking(I, J, PtrPartition))
+  for (unsigned I = 0; I < NumPartitions; ++I)
+    for (unsigned J = I + 1; J < NumPartitions; ++J)
+      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
         CheckCount++;
   return CheckCount;
 }
 
-bool LoopAccessInfo::RuntimePointerCheck::needsAnyChecking(
+bool RuntimePointerChecking::needsAnyChecking(
     const SmallVectorImpl<int> *PtrPartition) const {
-  return getNumberOfChecks(PtrPartition) != 0;
+  unsigned NumPointers = Pointers.size();
+
+  for (unsigned I = 0; I < NumPointers; ++I)
+    for (unsigned J = I + 1; J < NumPointers; ++J)
+      if (needsChecking(I, J, PtrPartition))
+        return true;
+  return false;
 }
 
 namespace {
@@ -207,7 +391,8 @@ public:
 
   AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
                  MemoryDepChecker::DepCandidates &DA)
-      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckNeeded(false) {}
+      : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
+        IsRTCheckAnalysisNeeded(false) {}
 
   /// \brief Register a load  and whether it is only read from.
   void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -226,11 +411,12 @@ public:
   }
 
   /// \brief Check whether we can check the pointers at runtime for
-  /// non-intersection. Returns true when we have 0 pointers
-  /// (a check on 0 pointers for non-intersection will always return true).
-  bool canCheckPtrAtRT(LoopAccessInfo::RuntimePointerCheck &RtCheck,
-                       bool &NeedRTCheck, ScalarEvolution *SE, Loop *TheLoop,
-                       const ValueToValueMap &Strides,
+  /// non-intersection.
+  ///
+  /// Returns true if we need no check or if we do and we can generate them
+  /// (i.e. the pointers have computable bounds).
+  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
+                       Loop *TheLoop, const ValueToValueMap &Strides,
                        bool ShouldCheckStride = false);
 
   /// \brief Goes over all memory accesses, checks whether a RT check is needed
@@ -239,8 +425,11 @@ public:
     processMemAccesses();
   }
 
-  bool isRTCheckNeeded() { return IsRTCheckNeeded; }
-
+  /// \brief Initial processing of memory accesses determined that we need to
+  /// perform dependency checking.
+  ///
+  /// Note that this can later be cleared if we retry memcheck analysis without
+  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
 
   /// We decided that no dependence analysis would be used.  Reset the state.
@@ -255,7 +444,7 @@ private:
   typedef SetVector<MemAccessInfo> PtrAccessSet;
 
   /// \brief Go over all memory access and check whether runtime pointer checks
-  /// are needed /// and build sets of dependency check candidates.
+  /// are needed and build sets of dependency check candidates.
   void processMemAccesses();
 
   /// Set of all accesses.
@@ -280,7 +469,14 @@ private:
   /// dependence check.
   MemoryDepChecker::DepCandidates &DepCands;
 
-  bool IsRTCheckNeeded;
+  /// \brief Initial processing of memory accesses determined that we may need
+  /// to add memchecks.  Perform the analysis to determine the necessary checks.
+  ///
+  /// Note that, this is different from isDependencyCheckNeeded.  When we retry
+  /// memcheck analysis without dependency checking
+  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
+  /// while this remains set if we have potentially dependent accesses.
+  bool IsRTCheckAnalysisNeeded;
 };
 
 } // end anonymous namespace
@@ -296,16 +492,16 @@ static bool hasComputableBounds(ScalarEvolution *SE,
   return AR->isAffine();
 }
 
-bool AccessAnalysis::canCheckPtrAtRT(
-    LoopAccessInfo::RuntimePointerCheck &RtCheck, bool &NeedRTCheck,
-    ScalarEvolution *SE, Loop *TheLoop, const ValueToValueMap &StridesMap,
-    bool ShouldCheckStride) {
+bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
+                                     ScalarEvolution *SE, Loop *TheLoop,
+                                     const ValueToValueMap &StridesMap,
+                                     bool ShouldCheckStride) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRT = true;
 
-  NeedRTCheck = false;
-  if (!IsRTCheckNeeded) return true;
+  bool NeedRTCheck = false;
+  if (!IsRTCheckAnalysisNeeded) return true;
 
   bool IsDepCheckNeeded = isDependencyCheckNeeded();
 
@@ -313,6 +509,9 @@ bool AccessAnalysis::canCheckPtrAtRT(
   // Accesses between different groups doesn't need to be checked.
   unsigned ASId = 1;
   for (auto &AS : AST) {
+    int NumReadPtrChecks = 0;
+    int NumWritePtrChecks = 0;
+
     // We assign consecutive id to access from different dependence sets.
     // Accesses within the same set don't need a runtime check.
     unsigned RunningDepId = 1;
@@ -323,6 +522,11 @@ bool AccessAnalysis::canCheckPtrAtRT(
       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
       MemAccessInfo Access(Ptr, IsWrite);
 
+      if (IsWrite)
+        ++NumWritePtrChecks;
+      else
+        ++NumReadPtrChecks;
+
       if (hasComputableBounds(SE, StridesMap, Ptr) &&
           // When we run after a failing dependency check we have to make sure
           // we don't have wrapping pointers.
@@ -341,7 +545,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
           // Each access has its own dependence set.
           DepId = RunningDepId++;
 
-        RtCheck.insert(SE, TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
 
         DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
       } else {
@@ -350,15 +554,21 @@ bool AccessAnalysis::canCheckPtrAtRT(
       }
     }
 
+    // If we have at least two writes or one write and a read then we need to
+    // check them.  But there is no need to checks if there is only one
+    // dependence set for this alias set.
+    //
+    // Note that this function computes CanDoRT and NeedRTCheck independently.
+    // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
+    // for which we couldn't find the bounds but we don't actually need to emit
+    // any checks so it does not matter.
+    if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
+      NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
+                                                 NumWritePtrChecks >= 1));
+
     ++ASId;
   }
 
-  // We need a runtime check if there are any accesses that need checking.
-  // However, some accesses cannot be checked (for example because we
-  // can't determine their bounds). In these cases we would need a check
-  // but wouldn't be able to add it.
-  NeedRTCheck = !CanDoRT || RtCheck.needsAnyChecking(nullptr);
-
   // If the pointers that we would use for the bounds comparison have different
   // address spaces, assume the values aren't directly comparable, so we can't
   // use them for the runtime check. We also have to assume they could
@@ -368,14 +578,15 @@ bool AccessAnalysis::canCheckPtrAtRT(
   for (unsigned i = 0; i < NumPointers; ++i) {
     for (unsigned j = i + 1; j < NumPointers; ++j) {
       // Only need to check pointers between two different dependency sets.
-      if (RtCheck.DependencySetId[i] == RtCheck.DependencySetId[j])
+      if (RtCheck.Pointers[i].DependencySetId ==
+          RtCheck.Pointers[j].DependencySetId)
        continue;
       // Only need to check pointers in the same alias set.
-      if (RtCheck.AliasSetId[i] != RtCheck.AliasSetId[j])
+      if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
         continue;
 
-      Value *PtrI = RtCheck.Pointers[i];
-      Value *PtrJ = RtCheck.Pointers[j];
+      Value *PtrI = RtCheck.Pointers[i].PointerValue;
+      Value *PtrJ = RtCheck.Pointers[j].PointerValue;
 
       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
@@ -387,7 +598,18 @@ bool AccessAnalysis::canCheckPtrAtRT(
     }
   }
 
-  return CanDoRT;
+  if (NeedRTCheck && CanDoRT)
+    RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+
+  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+               << " pointer comparisons.\n");
+
+  RtCheck.Need = NeedRTCheck;
+
+  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
+  if (!CanDoRTIfNeeded)
+    RtCheck.reset();
+  return CanDoRTIfNeeded;
 }
 
 void AccessAnalysis::processMemAccesses() {
@@ -470,7 +692,7 @@ void AccessAnalysis::processMemAccesses() {
           // catch "a[i] = a[i] + " without having to do a dependence check).
           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
             CheckDeps.insert(Access);
-            IsRTCheckNeeded = true;
+            IsRTCheckAnalysisNeeded = true;
           }
 
           if (IsWrite)
@@ -600,7 +822,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
   // Check the step is constant.
   const SCEV *Step = AR->getStepRecurrence(*SE);
 
-  // Calculate the pointer stride and check if it is consecutive.
+  // Calculate the pointer stride and check if it is constant.
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
   if (!C) {
     DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
@@ -805,11 +1027,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
         << *InstMap[BIdx] << ": " << *Dist << "\n");
 
-  // Need consecutive accesses. We don't want to vectorize
+  // Need accesses with constant stride. We don't want to vectorize
   // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
   // the address space.
   if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
-    DEBUG(dbgs() << "Non-consecutive pointer access\n");
+    DEBUG(dbgs() << "Pointer access with non-constant stride\n");
     return Dependence::Unknown;
   }
 
@@ -859,8 +1081,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
 
   unsigned Stride = std::abs(StrideAPtr);
   if (Stride > 1 &&
-      areStridedAccessesIndependent(Distance, Stride, TypeByteSize))
+      areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) {
+    DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
     return Dependence::NoDep;
+  }
 
   // Bail out early if passed-in parameters make vectorization not feasible.
   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
@@ -1098,8 +1322,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
   unsigned NumReads = 0;
   unsigned NumReadWrites = 0;
 
-  PtrRtCheck.Pointers.clear();
-  PtrRtCheck.Need = false;
+  PtrRtChecking.Pointers.clear();
+  PtrRtChecking.Need = false;
 
   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 
@@ -1258,28 +1482,17 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
 
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
-  bool NeedRTCheck;
-  bool CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck,
-                                          NeedRTCheck, SE,
-                                          TheLoop, Strides);
-
-  DEBUG(dbgs() << "LAA: We need to do "
-               << PtrRtCheck.getNumberOfChecks(nullptr)
-               << " pointer comparisons.\n");
-
-  // Check that we found the bounds for the pointer.
-  if (CanDoRT)
-    DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
-  else if (NeedRTCheck) {
+  bool CanDoRTIfNeeded =
+      Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+  if (!CanDoRTIfNeeded) {
     emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
-    DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " <<
-          "the array bounds.\n");
-    PtrRtCheck.reset();
+    DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
+                 << "the array bounds.\n");
     CanVecMem = false;
     return;
   }
 
-  PtrRtCheck.Need = NeedRTCheck;
+  DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
 
   CanVecMem = true;
   if (Accesses.isDependencyCheckNeeded()) {
@@ -1290,23 +1503,21 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
 
     if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
       DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
-      NeedRTCheck = true;
 
       // Clear the dependency checks. We assume they are not needed.
       Accesses.resetDepChecks(DepChecker);
 
-      PtrRtCheck.reset();
-      PtrRtCheck.Need = true;
+      PtrRtChecking.reset();
+      PtrRtChecking.Need = true;
 
-      CanDoRT = Accesses.canCheckPtrAtRT(PtrRtCheck, NeedRTCheck, SE,
-                                         TheLoop, Strides, true);
+      CanDoRTIfNeeded =
+          Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
 
       // Check that we found the bounds for the pointer.
-      if (NeedRTCheck && !CanDoRT) {
+      if (!CanDoRTIfNeeded) {
         emitAnalysis(LoopAccessReport()
                      << "cannot check memory dependencies at runtime");
         DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
-        PtrRtCheck.reset();
         CanVecMem = false;
         return;
       }
@@ -1317,8 +1528,8 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
 
   if (CanVecMem)
     DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
-                 << (NeedRTCheck ? "" : " don't")
-                 << " need a runtime memory check.\n");
+                 << (PtrRtChecking.Need ? "" : " don't")
+                 << " need runtime memory checks.\n");
   else {
     emitAnalysis(LoopAccessReport() <<
                  "unsafe dependent memory operations in loop");
@@ -1357,35 +1568,38 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
 
 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
     Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
-  if (!PtrRtCheck.Need)
+  if (!PtrRtChecking.Need)
     return std::make_pair(nullptr, nullptr);
 
-  unsigned NumPointers = PtrRtCheck.Pointers.size();
-  SmallVector<TrackingVH<Value> , 2> Starts;
-  SmallVector<TrackingVH<Value> , 2> Ends;
+  SmallVector<TrackingVH<Value>, 2> Starts;
+  SmallVector<TrackingVH<Value>, 2> Ends;
 
   LLVMContext &Ctx = Loc->getContext();
   SCEVExpander Exp(*SE, DL, "induction");
   Instruction *FirstInst = nullptr;
 
-  for (unsigned i = 0; i < NumPointers; ++i) {
-    Value *Ptr = PtrRtCheck.Pointers[i];
+  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+    const RuntimePointerChecking::CheckingPtrGroup &CG =
+        PtrRtChecking.CheckingGroups[i];
+    Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
     const SCEV *Sc = SE->getSCEV(Ptr);
 
     if (SE->isLoopInvariant(Sc, TheLoop)) {
-      DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" <<
-            *Ptr <<"\n");
+      DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+                   << "\n");
       Starts.push_back(Ptr);
       Ends.push_back(Ptr);
     } else {
-      DEBUG(dbgs() << "LAA: Adding RT check for range:" << *Ptr << '\n');
       unsigned AS = Ptr->getType()->getPointerAddressSpace();
 
       // Use this type for pointer arithmetic.
       Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+      Value *Start = nullptr, *End = nullptr;
 
-      Value *Start = Exp.expandCodeFor(PtrRtCheck.Starts[i], PtrArithTy, Loc);
-      Value *End = Exp.expandCodeFor(PtrRtCheck.Ends[i], PtrArithTy, Loc);
+      DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+      Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
+      End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
+      DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
       Starts.push_back(Start);
       Ends.push_back(End);
     }
@@ -1394,9 +1608,14 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
   IRBuilder<> ChkBuilder(Loc);
   // Our instructions might fold to a constant.
   Value *MemoryRuntimeCheck = nullptr;
-  for (unsigned i = 0; i < NumPointers; ++i) {
-    for (unsigned j = i+1; j < NumPointers; ++j) {
-      if (!PtrRtCheck.needsChecking(i, j, PtrPartition))
+  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
+    for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
+      const RuntimePointerChecking::CheckingPtrGroup &CGI =
+          PtrRtChecking.CheckingGroups[i];
+      const RuntimePointerChecking::CheckingPtrGroup &CGJ =
+          PtrRtChecking.CheckingGroups[j];
+
+      if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
         continue;
 
       unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
@@ -1447,7 +1666,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
                                DominatorTree *DT, LoopInfo *LI,
                                const ValueToValueMap &Strides)
-    : DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+    : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
       TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
       MaxSafeDepDistBytes(-1U), CanVecMem(false),
       StoreToLoopInvariantAddress(false) {
@@ -1457,7 +1676,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
 
 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   if (CanVecMem) {
-    if (PtrRtCheck.Need)
+    if (PtrRtChecking.Need)
       OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
     else
       OS.indent(Depth) << "Memory dependences are safe\n";
@@ -1476,7 +1695,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
     OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
 
   // List the pair of accesses need run-time checks to prove independence.
-  PtrRtCheck.print(OS, Depth);
+  PtrRtChecking.print(OS, Depth);
   OS << "\n";
 
   OS.indent(Depth) << "Store to invariant address was "
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 7617622b9ab6..322a9a80de4c 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -72,7 +72,6 @@ namespace {
     }
 
     void deleteValue(Value *V) override {}
-    void copyValue(Value *From, Value *To) override {}
     void addEscapingUse(Use &U) override {}
 
     /// getAdjustedAnalysisPointer - This method is used when a pass implements
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 520d1e5ef87d..7d1c3fbef68a 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -28,12 +28,12 @@ namespace {
 ///
 /// This is used when no target specific information is available.
 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
-  explicit NoTTIImpl(const DataLayout *DL)
+  explicit NoTTIImpl(const DataLayout &DL)
       : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
 };
 }
 
-TargetTransformInfo::TargetTransformInfo(const DataLayout *DL)
+TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
     : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
 
 TargetTransformInfo::~TargetTransformInfo() {}
@@ -304,7 +304,7 @@ TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
 char TargetIRAnalysis::PassID;
 
 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
-  return Result(&F.getParent()->getDataLayout());
+  return Result(F.getParent()->getDataLayout());
 }
 
 // Register the basic pass.
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index c45005f343d3..fa0d7798cae9 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1464,7 +1464,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
           // If the object is defined in the current Module, we'll be giving
           // it the preferred alignment. Otherwise, we have to assume that it
           // may only have the minimum ABI alignment.
-          if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+          if (GVar->isStrongDefinitionForLinker())
             Align = DL.getPreferredAlignment(GVar);
           else
             Align = DL.getABITypeAlignment(ObjectType);
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 96fddd103cc5..67f68dc8391e 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -11,7 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
 
 /// \brief Identify if the intrinsic is trivially vectorizable.
 /// This method returns true if the intrinsic's argument types are all
@@ -211,3 +217,195 @@ llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
 
   return Intrinsic::not_intrinsic;
 }
+
+/// \brief Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
+  const DataLayout &DL = Gep->getModule()->getDataLayout();
+  unsigned LastOperand = Gep->getNumOperands() - 1;
+  unsigned GEPAllocSize = DL.getTypeAllocSize(
+      cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
+
+  // Walk backwards and try to peel off zeros.
+  while (LastOperand > 1 &&
+         match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) {
+    // Find the type we're currently indexing into.
+    gep_type_iterator GEPTI = gep_type_begin(Gep);
+    std::advance(GEPTI, LastOperand - 1);
+
+    // If it's a type with the same allocation size as the result of the GEP we
+    // can peel off the zero index.
+    if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+      break;
+    --LastOperand;
+  }
+
+  return LastOperand;
+}
+
+/// \brief If the argument is a GEP, then returns the operand identified by
+/// getGEPInductionOperand. However, if there is some other non-loop-invariant
+/// operand, it returns that instead.
+llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
+                                      Loop *Lp) {
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP)
+    return Ptr;
+
+  unsigned InductionOperand = getGEPInductionOperand(GEP);
+
+  // Check that all of the gep indices are uniform except for our induction
+  // operand.
+  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
+    if (i != InductionOperand &&
+        !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
+      return Ptr;
+  return GEP->getOperand(InductionOperand);
+}
+
+/// \brief If a value has only one user that is a CastInst, return it.
+llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
+  llvm::Value *UniqueCast = nullptr;
+  for (User *U : Ptr->users()) {
+    CastInst *CI = dyn_cast<CastInst>(U);
+    if (CI && CI->getType() == Ty) {
+      if (!UniqueCast)
+        UniqueCast = CI;
+      else
+        return nullptr;
+    }
+  }
+  return UniqueCast;
+}
+
+/// \brief Get the stride of a pointer access in a loop. Looks for symbolic
+/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
+llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
+                                        Loop *Lp) {
+  const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+  if (!PtrTy || PtrTy->isAggregateType())
+    return nullptr;
+
+  // Try to remove a gep instruction to make the pointer (actually index at this
+  // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
+  // pointer, otherwise, we are analyzing the index.
+  llvm::Value *OrigPtr = Ptr;
+
+  // The size of the pointer access.
+  int64_t PtrAccessSize = 1;
+
+  Ptr = stripGetElementPtr(Ptr, SE, Lp);
+  const SCEV *V = SE->getSCEV(Ptr);
+
+  if (Ptr != OrigPtr)
+    // Strip off casts.
+    while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
+      V = C->getOperand();
+
+  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
+  if (!S)
+    return nullptr;
+
+  V = S->getStepRecurrence(*SE);
+  if (!V)
+    return nullptr;
+
+  // Strip off the size of access multiplication if we are still analyzing the
+  // pointer.
+  if (OrigPtr == Ptr) {
+    const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
+    DL.getTypeAllocSize(PtrTy->getElementType());
+    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
+      if (M->getOperand(0)->getSCEVType() != scConstant)
+        return nullptr;
+
+      const APInt &APStepVal =
+          cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+
+      // Huge step value - give up.
+      if (APStepVal.getBitWidth() > 64)
+        return nullptr;
+
+      int64_t StepVal = APStepVal.getSExtValue();
+      if (PtrAccessSize != StepVal)
+        return nullptr;
+      V = M->getOperand(1);
+    }
+  }
+
+  // Strip off casts.
+  Type *StripedOffRecurrenceCast = nullptr;
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
+    StripedOffRecurrenceCast = C->getType();
+    V = C->getOperand();
+  }
+
+  // Look for the loop invariant symbolic value.
+  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
+  if (!U)
+    return nullptr;
+
+  llvm::Value *Stride = U->getValue();
+  if (!Lp->isLoopInvariant(Stride))
+    return nullptr;
+
+  // If we have stripped off the recurrence cast we have to make sure that we
+  // return the value that is used in this loop so that we can replace it later.
+  if (StripedOffRecurrenceCast)
+    Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
+
+  return Stride;
+}
+
+/// \brief Given a vector and an element number, see if the scalar value is
+/// already around as a register, for example if it were inserted then extracted
+/// from the vector.
+llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
+  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+  VectorType *VTy = cast<VectorType>(V->getType());
+  unsigned Width = VTy->getNumElements();
+  if (EltNo >= Width)  // Out of range access.
+    return UndefValue::get(VTy->getElementType());
+
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->getAggregateElement(EltNo);
+
+  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert to a variable element, we don't know what it is.
+    if (!isa<ConstantInt>(III->getOperand(2)))
+      return nullptr;
+    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+    // If this is an insert to the element we are looking for, return the
+    // inserted value.
+    if (EltNo == IIElt)
+      return III->getOperand(1);
+
+    // Otherwise, the insertelement doesn't modify the value, recurse on its
+    // vector input.
+    return findScalarElement(III->getOperand(0), EltNo);
+  }
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+    unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+    int InEl = SVI->getMaskValue(EltNo);
+    if (InEl < 0)
+      return UndefValue::get(VTy->getElementType());
+    if (InEl < (int)LHSWidth)
+      return findScalarElement(SVI->getOperand(0), InEl);
+    return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+  }
+
+  // Extract a value from a vector add operation with a constant zero.
+  Value *Val = nullptr; Constant *Con = nullptr;
+  if (match(V,
+            llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
+                                      llvm::PatternMatch::m_Constant(Con)))) {
+    if (Con->getAggregateElement(EltNo)->isNullValue())
+      return findScalarElement(Val, EltNo);
+  }
+
+  // Otherwise, we don't know.
+  return nullptr;
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 88f359d4fd5c..5c4bab734b2b 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -593,6 +593,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(attributes);
 
   KEYWORD(alwaysinline);
+  KEYWORD(argmemonly);
   KEYWORD(builtin);
   KEYWORD(byval);
   KEYWORD(inalloca);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index b3c7fa087d40..1c6e7bd18d0e 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -946,35 +946,42 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
       B.addStackAlignmentAttr(Alignment);
       continue;
     }
-    case lltok::kw_alwaysinline:      B.addAttribute(Attribute::AlwaysInline); break;
-    case lltok::kw_builtin:           B.addAttribute(Attribute::Builtin); break;
-    case lltok::kw_cold:              B.addAttribute(Attribute::Cold); break;
-    case lltok::kw_convergent:        B.addAttribute(Attribute::Convergent); break;
-    case lltok::kw_inlinehint:        B.addAttribute(Attribute::InlineHint); break;
-    case lltok::kw_jumptable:         B.addAttribute(Attribute::JumpTable); break;
-    case lltok::kw_minsize:           B.addAttribute(Attribute::MinSize); break;
-    case lltok::kw_naked:             B.addAttribute(Attribute::Naked); break;
-    case lltok::kw_nobuiltin:         B.addAttribute(Attribute::NoBuiltin); break;
-    case lltok::kw_noduplicate:       B.addAttribute(Attribute::NoDuplicate); break;
-    case lltok::kw_noimplicitfloat:   B.addAttribute(Attribute::NoImplicitFloat); break;
-    case lltok::kw_noinline:          B.addAttribute(Attribute::NoInline); break;
-    case lltok::kw_nonlazybind:       B.addAttribute(Attribute::NonLazyBind); break;
-    case lltok::kw_noredzone:         B.addAttribute(Attribute::NoRedZone); break;
-    case lltok::kw_noreturn:          B.addAttribute(Attribute::NoReturn); break;
-    case lltok::kw_nounwind:          B.addAttribute(Attribute::NoUnwind); break;
-    case lltok::kw_optnone:           B.addAttribute(Attribute::OptimizeNone); break;
-    case lltok::kw_optsize:           B.addAttribute(Attribute::OptimizeForSize); break;
-    case lltok::kw_readnone:          B.addAttribute(Attribute::ReadNone); break;
-    case lltok::kw_readonly:          B.addAttribute(Attribute::ReadOnly); break;
-    case lltok::kw_returns_twice:     B.addAttribute(Attribute::ReturnsTwice); break;
-    case lltok::kw_ssp:               B.addAttribute(Attribute::StackProtect); break;
-    case lltok::kw_sspreq:            B.addAttribute(Attribute::StackProtectReq); break;
-    case lltok::kw_sspstrong:         B.addAttribute(Attribute::StackProtectStrong); break;
-    case lltok::kw_safestack:         B.addAttribute(Attribute::SafeStack); break;
-    case lltok::kw_sanitize_address:  B.addAttribute(Attribute::SanitizeAddress); break;
-    case lltok::kw_sanitize_thread:   B.addAttribute(Attribute::SanitizeThread); break;
-    case lltok::kw_sanitize_memory:   B.addAttribute(Attribute::SanitizeMemory); break;
-    case lltok::kw_uwtable:           B.addAttribute(Attribute::UWTable); break;
+    case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break;
+    case lltok::kw_argmemonly: B.addAttribute(Attribute::ArgMemOnly); break;
+    case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break;
+    case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
+    case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break;
+    case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
+    case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break;
+    case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
+    case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
+    case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
+    case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
+    case lltok::kw_noimplicitfloat:
+      B.addAttribute(Attribute::NoImplicitFloat); break;
+    case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break;
+    case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
+    case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
+    case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
+    case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
+    case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break;
+    case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break;
+    case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break;
+    case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
+    case lltok::kw_returns_twice:
+      B.addAttribute(Attribute::ReturnsTwice); break;
+    case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break;
+    case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break;
+    case lltok::kw_sspstrong:
+      B.addAttribute(Attribute::StackProtectStrong); break;
+    case lltok::kw_safestack: B.addAttribute(Attribute::SafeStack); break;
+    case lltok::kw_sanitize_address:
+      B.addAttribute(Attribute::SanitizeAddress); break;
+    case lltok::kw_sanitize_thread:
+      B.addAttribute(Attribute::SanitizeThread); break;
+    case lltok::kw_sanitize_memory:
+      B.addAttribute(Attribute::SanitizeMemory); break;
+    case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
 
     // Error handling.
     case lltok::kw_inreg:
@@ -1258,6 +1265,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
 
     case lltok::kw_alignstack:
     case lltok::kw_alwaysinline:
+    case lltok::kw_argmemonly:
     case lltok::kw_builtin:
     case lltok::kw_inlinehint:
     case lltok::kw_jumptable:
@@ -1334,6 +1342,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
 
     case lltok::kw_alignstack:
     case lltok::kw_alwaysinline:
+    case lltok::kw_argmemonly:
     case lltok::kw_builtin:
     case lltok::kw_cold:
     case lltok::kw_inlinehint:
@@ -2873,8 +2882,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         if (ValTy->isVectorTy() != BaseType->isVectorTy())
           return Error(ID.Loc, "getelementptr index type missmatch");
         if (ValTy->isVectorTy()) {
-          unsigned ValNumEl = cast<VectorType>(ValTy)->getNumElements();
-          unsigned PtrNumEl = cast<VectorType>(BaseType)->getNumElements();
+          unsigned ValNumEl = ValTy->getVectorNumElements();
+          unsigned PtrNumEl = BaseType->getVectorNumElements();
           if (ValNumEl != PtrNumEl)
             return Error(
                 ID.Loc,
@@ -4534,8 +4543,17 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_and:
   case lltok::kw_or:
   case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
-  case lltok::kw_icmp:
-  case lltok::kw_fcmp:   return ParseCompare(Inst, PFS, KeywordVal);
+  case lltok::kw_icmp:   return ParseCompare(Inst, PFS, KeywordVal);
+  case lltok::kw_fcmp: {
+    FastMathFlags FMF = EatFastMathFlagsIfPresent();
+    int Res = ParseCompare(Inst, PFS, KeywordVal);
+    if (Res != 0)
+      return Res;
+    if (FMF.any())
+      Inst->setFastMathFlags(FMF);
+    return 0;
+  }
+
   // Casts.
   case lltok::kw_trunc:
   case lltok::kw_zext:
@@ -5572,6 +5590,11 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
 
   SmallVector<Value*, 16> Indices;
   bool AteExtraComma = false;
+  // GEP returns a vector of pointers if at least one of parameters is a vector.
+  // All vector parameters should have the same vector width.
+  unsigned GEPWidth = BaseType->isVectorTy() ?
+    BaseType->getVectorNumElements() : 0;
+
   while (EatIfPresent(lltok::comma)) {
     if (Lex.getKind() == lltok::MetadataVar) {
       AteExtraComma = true;
@@ -5580,14 +5603,13 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
     if (!Val->getType()->getScalarType()->isIntegerTy())
       return Error(EltLoc, "getelementptr index must be an integer");
-    if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
-      return Error(EltLoc, "getelementptr index type missmatch");
+
     if (Val->getType()->isVectorTy()) {
-      unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
-      unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
-      if (ValNumEl != PtrNumEl)
+      unsigned ValNumEl = Val->getType()->getVectorNumElements();
+      if (GEPWidth && GEPWidth != ValNumEl)
         return Error(EltLoc,
           "getelementptr vector index has a wrong number of elements");
+      GEPWidth = ValNumEl;
     }
     Indices.push_back(Val);
   }
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 2487d1208133..691f085f0c9f 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -100,6 +100,7 @@ namespace lltok {
     // Attributes:
     kw_attributes,
     kw_alwaysinline,
+    kw_argmemonly,
     kw_sanitize_address,
     kw_builtin,
     kw_byval,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 09f0b689bdc3..c04e8b9f1f37 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -697,6 +697,21 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {
   }
 }
 
+static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
+  FastMathFlags FMF;
+  if (0 != (Val & FastMathFlags::UnsafeAlgebra))
+    FMF.setUnsafeAlgebra();
+  if (0 != (Val & FastMathFlags::NoNaNs))
+    FMF.setNoNaNs();
+  if (0 != (Val & FastMathFlags::NoInfs))
+    FMF.setNoInfs();
+  if (0 != (Val & FastMathFlags::NoSignedZeros))
+    FMF.setNoSignedZeros();
+  if (0 != (Val & FastMathFlags::AllowReciprocal))
+    FMF.setAllowReciprocal();
+  return FMF;
+}
+
 static void upgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
   switch (Val) {
   case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break;
@@ -1075,6 +1090,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::Alignment;
   case bitc::ATTR_KIND_ALWAYS_INLINE:
     return Attribute::AlwaysInline;
+  case bitc::ATTR_KIND_ARGMEMONLY:
+    return Attribute::ArgMemOnly;
   case bitc::ATTR_KIND_BUILTIN:
     return Attribute::Builtin;
   case bitc::ATTR_KIND_BY_VAL:
@@ -3472,17 +3489,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
           if (Record[OpNum] & (1 << bitc::PEO_EXACT))
             cast<BinaryOperator>(I)->setIsExact(true);
         } else if (isa<FPMathOperator>(I)) {
-          FastMathFlags FMF;
-          if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra))
-            FMF.setUnsafeAlgebra();
-          if (0 != (Record[OpNum] & FastMathFlags::NoNaNs))
-            FMF.setNoNaNs();
-          if (0 != (Record[OpNum] & FastMathFlags::NoInfs))
-            FMF.setNoInfs();
-          if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros))
-            FMF.setNoSignedZeros();
-          if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal))
-            FMF.setAllowReciprocal();
+          FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]);
           if (FMF.any())
             I->setFastMathFlags(FMF);
         }
@@ -3739,14 +3746,25 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       unsigned OpNum = 0;
       Value *LHS, *RHS;
       if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
-          popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) ||
-          OpNum+1 != Record.size())
+          popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS))
+        return error("Invalid record");
+
+      unsigned PredVal = Record[OpNum];
+      bool IsFP = LHS->getType()->isFPOrFPVectorTy();
+      FastMathFlags FMF;
+      if (IsFP && Record.size() > OpNum+1)
+        FMF = getDecodedFastMathFlags(Record[++OpNum]);
+
+      if (OpNum+1 != Record.size())
         return error("Invalid record");
 
       if (LHS->getType()->isFPOrFPVectorTy())
-        I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+        I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS);
       else
-        I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+        I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS);
+
+      if (FMF.any())
+        I->setFastMathFlags(FMF);
       InstructionList.push_back(I);
       break;
     }
@@ -4458,14 +4476,11 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
 
   // Upgrade any old intrinsic calls in the function.
   for (auto &I : UpgradedIntrinsics) {
-    if (I.first != I.second) {
-      for (auto UI = I.first->user_begin(), UE = I.first->user_end();
-           UI != UE;) {
-        User *U = *UI;
-        ++UI;
-        if (CallInst *CI = dyn_cast<CallInst>(U))
-          UpgradeIntrinsicCall(CI, I.second);
-      }
+    for (auto UI = I.first->user_begin(), UE = I.first->user_end(); UI != UE;) {
+      User *U = *UI;
+      ++UI;
+      if (CallInst *CI = dyn_cast<CallInst>(U))
+        UpgradeIntrinsicCall(CI, I.second);
     }
   }
 
@@ -4533,15 +4548,13 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
   // module is materialized because there could always be another function body
   // with calls to the old function.
   for (auto &I : UpgradedIntrinsics) {
-    if (I.first != I.second) {
-      for (auto *U : I.first->users()) {
-        if (CallInst *CI = dyn_cast<CallInst>(U))
-          UpgradeIntrinsicCall(CI, I.second);
-      }
-      if (!I.first->use_empty())
-        I.first->replaceAllUsesWith(I.second);
-      I.first->eraseFromParent();
+    for (auto *U : I.first->users()) {
+      if (CallInst *CI = dyn_cast<CallInst>(U))
+        UpgradeIntrinsicCall(CI, I.second);
     }
+    if (!I.first->use_empty())
+      I.first->replaceAllUsesWith(I.second);
+    I.first->eraseFromParent();
   }
   UpgradedIntrinsics.clear();
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 622f7eaf0784..1a70ba5ac127 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -162,6 +162,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_ALIGNMENT;
   case Attribute::AlwaysInline:
     return bitc::ATTR_KIND_ALWAYS_INLINE;
+  case Attribute::ArgMemOnly:
+    return bitc::ATTR_KIND_ARGMEMONLY;
   case Attribute::Builtin:
     return bitc::ATTR_KIND_BUILTIN;
   case Attribute::ByVal:
@@ -1759,13 +1761,17 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     pushValue(I.getOperand(2), InstID, Vals, VE);
     break;
   case Instruction::ICmp:
-  case Instruction::FCmp:
+  case Instruction::FCmp: {
     // compare returning Int1Ty or vector of Int1Ty
     Code = bitc::FUNC_CODE_INST_CMP2;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     pushValue(I.getOperand(1), InstID, Vals, VE);
     Vals.push_back(cast<CmpInst>(I).getPredicate());
+    uint64_t Flags = GetOptimizationFlags(&I);
+    if (Flags != 0)
+      Vals.push_back(Flags);
     break;
+  }
 
   case Instruction::Ret:
     {
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 3224fac25cb4..98d4c8afc7b9 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -81,27 +81,27 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
 /// If Offsets is non-null, it points to a vector to be filled in
 /// with the in-memory offsets of each of the individual values.
 ///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
-                           SmallVectorImpl<EVT> &ValueVTs,
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+                           Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
                            SmallVectorImpl<uint64_t> *Offsets,
                            uint64_t StartingOffset) {
   // Given a struct type, recursively traverse the elements.
   if (StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructLayout *SL = TLI.getDataLayout()->getStructLayout(STy);
+    const StructLayout *SL = DL.getStructLayout(STy);
     for (StructType::element_iterator EB = STy->element_begin(),
                                       EI = EB,
                                       EE = STy->element_end();
          EI != EE; ++EI)
-      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+      ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
                       StartingOffset + SL->getElementOffset(EI - EB));
     return;
   }
   // Given an array type, recursively traverse the elements.
   if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
     Type *EltTy = ATy->getElementType();
-    uint64_t EltSize = TLI.getDataLayout()->getTypeAllocSize(EltTy);
+    uint64_t EltSize = DL.getTypeAllocSize(EltTy);
     for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
-      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+      ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
                       StartingOffset + i * EltSize);
     return;
   }
@@ -109,7 +109,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, Type *Ty,
   if (Ty->isVoidTy())
     return;
   // Base case: we can get an EVT for this LLVM IR type.
-  ValueVTs.push_back(TLI.getValueType(Ty));
+  ValueVTs.push_back(TLI.getValueType(DL, Ty));
   if (Offsets)
     Offsets->push_back(StartingOffset);
 }
@@ -233,7 +233,8 @@ static bool isNoopBitcast(Type *T1, Type *T2,
 static const Value *getNoopInput(const Value *V,
                                  SmallVectorImpl<unsigned> &ValLoc,
                                  unsigned &DataBits,
-                                 const TargetLoweringBase &TLI) {
+                                 const TargetLoweringBase &TLI,
+                                 const DataLayout &DL) {
   while (true) {
     // Try to look through V1; if V1 is not an instruction, it can't be looked
     // through.
@@ -255,16 +256,16 @@ static const Value *getNoopInput(const Value *V,
       // Make sure this isn't a truncating or extending cast.  We could
       // support this eventually, but don't bother for now.
       if (!isa<VectorType>(I->getType()) &&
-          TLI.getPointerTy().getSizeInBits() ==
-          cast<IntegerType>(Op->getType())->getBitWidth())
+          DL.getPointerSizeInBits() ==
+              cast<IntegerType>(Op->getType())->getBitWidth())
         NoopInput = Op;
     } else if (isa<PtrToIntInst>(I)) {
       // Look through ptrtoint.
       // Make sure this isn't a truncating or extending cast.  We could
       // support this eventually, but don't bother for now.
       if (!isa<VectorType>(I->getType()) &&
-          TLI.getPointerTy().getSizeInBits() ==
-          cast<IntegerType>(I->getType())->getBitWidth())
+          DL.getPointerSizeInBits() ==
+              cast<IntegerType>(I->getType())->getBitWidth())
         NoopInput = Op;
     } else if (isa<TruncInst>(I) &&
                TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
@@ -331,14 +332,15 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
                                  SmallVectorImpl<unsigned> &RetIndices,
                                  SmallVectorImpl<unsigned> &CallIndices,
                                  bool AllowDifferingSizes,
-                                 const TargetLoweringBase &TLI) {
+                                 const TargetLoweringBase &TLI,
+                                 const DataLayout &DL) {
 
   // Trace the sub-value needed by the return value as far back up the graph as
   // possible, in the hope that it will intersect with the value produced by the
   // call. In the simple case with no "returned" attribute, the hope is actually
   // that we end up back at the tail call instruction itself.
   unsigned BitsRequired = UINT_MAX;
-  RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI);
+  RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL);
 
   // If this slot in the value returned is undef, it doesn't matter what the
   // call puts there, it'll be fine.
@@ -350,7 +352,7 @@ static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal,
   // a "returned" attribute, the search will be blocked immediately and the loop
   // a Noop.
   unsigned BitsProvided = UINT_MAX;
-  CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI);
+  CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL);
 
   // There's no hope if we can't actually trace them to (the same part of!) the
   // same value.
@@ -606,7 +608,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
     // Finally, we can check whether the value produced by the tail call at this
     // index is compatible with the value we return.
     if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath,
-                              AllowDifferingSizes, TLI))
+                              AllowDifferingSizes, TLI,
+                              F->getParent()->getDataLayout()))
       return false;
 
     CallEmpty  = !nextRealType(CallSubTypes, CallPath);
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 4cb460a7bbfc..0bad7954b980 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -69,24 +69,32 @@ void ARMException::beginFunction(const MachineFunction *MF) {
 ///
 void ARMException::endFunction(const MachineFunction *MF) {
   ARMTargetStreamer &ATS = getTargetStreamer();
+  const Function *F = MF->getFunction();
+  const Function *Per = nullptr;
+  if (F->hasPersonalityFn())
+    Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+  assert(!MMI->getPersonality() || Per == MMI->getPersonality());
+  bool forceEmitPersonality =
+    F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+    F->needsUnwindTableEntry();
+  bool shouldEmitPersonality = forceEmitPersonality ||
+    !MMI->getLandingPads().empty();
   if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
-      MMI->getLandingPads().empty())
+      !shouldEmitPersonality)
     ATS.emitCantUnwind();
-  else {
-    if (!MMI->getLandingPads().empty()) {
-      // Emit references to personality.
-      if (const Function *Personality = MMI->getPersonality()) {
-        MCSymbol *PerSym = Asm->getSymbol(Personality);
-        Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
-        ATS.emitPersonality(PerSym);
-      }
-
-      // Emit .handlerdata directive.
-      ATS.emitHandlerData();
-
-      // Emit actual exception table
-      emitExceptionTable();
+  else if (shouldEmitPersonality) {
+    // Emit references to personality.
+    if (Per) {
+      MCSymbol *PerSym = Asm->getSymbol(Per);
+      Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global);
+      ATS.emitPersonality(PerSym);
     }
+
+    // Emit .handlerdata directive.
+    ATS.emitHandlerData();
+
+    // Emit actual exception table
+    emitExceptionTable();
   }
 
   if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8a7e9f991611..125047e7bbb5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -19,7 +19,6 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/JumpInstrTableInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -820,7 +819,7 @@ void AsmPrinter::EmitFunctionBody() {
         emitCFIInstruction(MI);
         break;
 
-      case TargetOpcode::FRAME_ALLOC:
+      case TargetOpcode::LOCAL_ESCAPE:
         emitFrameAlloc(MI);
         break;
 
@@ -1024,7 +1023,7 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // Emit visibility info for declarations
   for (const Function &F : M) {
-    if (!F.isDeclaration())
+    if (!F.isDeclarationForLinker())
       continue;
     GlobalValue::VisibilityTypes V = F.getVisibility();
     if (V == GlobalValue::DefaultVisibility)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 0bc873e326be..2c212c7ecee1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -89,6 +89,7 @@ void DwarfCFIException::endModule() {
 
 void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
+  const Function *F = MF->getFunction();
 
   // If any landing pads survive, we need an EH table.
   bool hasLandingPads = !MMI->getLandingPads().empty();
@@ -104,10 +105,24 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = MMI->getPersonality();
-
-  shouldEmitPersonality = hasLandingPads &&
-    PerEncoding != dwarf::DW_EH_PE_omit && Per;
+  const Function *Per = nullptr;
+  if (F->hasPersonalityFn())
+    Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+  assert(!MMI->getPersonality() || Per == MMI->getPersonality());
+
+  // Emit a personality function even when there are no landing pads
+  bool forceEmitPersonality =
+      // ...if a personality function is explicitly specified
+      F->hasPersonalityFn() &&
+      // ... and it's not known to be a noop in the absence of invokes
+      !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+      // ... and we're not explicitly asked not to emit it
+      F->needsUnwindTableEntry();
+
+  shouldEmitPersonality =
+      (forceEmitPersonality ||
+       (hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit)) &&
+      Per;
 
   unsigned LSDAEncoding = TLOF.getLSDAEncoding();
   shouldEmitLSDA = shouldEmitPersonality &&
@@ -123,6 +138,11 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality)
     return;
 
+  // If we are forced to emit this personality, make sure to record
+  // it because it might not appear in any landingpad
+  if (forceEmitPersonality)
+    MMI->addPersonality(Per);
+
   const MCSymbol *Sym =
       TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
   Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 1c3e2aec64ab..01f34c6eb81c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -49,7 +49,7 @@ class DwarfUnit;
 class MachineModuleInfo;
 
 //===----------------------------------------------------------------------===//
-/// \brief This class is used to record source line correspondence.
+/// This class is used to record source line correspondence.
 class SrcLineInfo {
   unsigned Line;     // Source line number.
   unsigned Column;   // Source column.
@@ -161,7 +161,7 @@ public:
 
     return dwarf::DW_TAG_variable;
   }
-  /// \brief Return true if DbgVariable is artificial.
+  /// Return true if DbgVariable is artificial.
   bool isArtificial() const {
     if (Var->isArtificial())
       return true;
@@ -190,149 +190,152 @@ public:
   const DIType *getType() const;
 
 private:
-  /// resolve - Look in the DwarfDebug map for the MDNode that
+  /// Look in the DwarfDebug map for the MDNode that
   /// corresponds to the reference.
   template <typename T> T *resolve(TypedDINodeRef<T> Ref) const;
 };
 
 
-/// \brief Helper used to pair up a symbol and its DWARF compile unit.
+/// Helper used to pair up a symbol and its DWARF compile unit.
 struct SymbolCU {
   SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
   const MCSymbol *Sym;
   DwarfCompileUnit *CU;
 };
 
-/// \brief Collects and handles dwarf debug information.
+/// Collects and handles dwarf debug information.
 class DwarfDebug : public AsmPrinterHandler {
-  // Target of Dwarf emission.
+  /// Target of Dwarf emission.
   AsmPrinter *Asm;
 
-  // Collected machine module information.
+  /// Collected machine module information.
   MachineModuleInfo *MMI;
 
-  // All DIEValues are allocated through this allocator.
+  /// All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
-  // Maps MDNode with its corresponding DwarfCompileUnit.
+  /// Maps MDNode with its corresponding DwarfCompileUnit.
   MapVector<const MDNode *, DwarfCompileUnit *> CUMap;
 
-  // Maps subprogram MDNode with its corresponding DwarfCompileUnit.
+  /// Maps subprogram MDNode with its corresponding DwarfCompileUnit.
   MapVector<const MDNode *, DwarfCompileUnit *> SPMap;
 
-  // Maps a CU DIE with its corresponding DwarfCompileUnit.
+  /// Maps a CU DIE with its corresponding DwarfCompileUnit.
   DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap;
 
-  // List of all labels used in aranges generation.
+  /// List of all labels used in aranges generation.
   std::vector<SymbolCU> ArangeLabels;
 
-  // Size of each symbol emitted (for those symbols that have a specific size).
+  /// Size of each symbol emitted (for those symbols that have a specific size).
   DenseMap<const MCSymbol *, uint64_t> SymSize;
 
   LexicalScopes LScopes;
 
-  // Collection of abstract variables.
+  /// Collection of abstract variables.
   DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
   SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
 
-  // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
-  // can refer to them in spite of insertions into this list.
+  /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
+  /// can refer to them in spite of insertions into this list.
   DebugLocStream DebugLocs;
 
-  // This is a collection of subprogram MDNodes that are processed to
-  // create DIEs.
+  /// This is a collection of subprogram MDNodes that are processed to
+  /// create DIEs.
   SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
 
-  // Maps instruction with label emitted before instruction.
+  /// Maps instruction with label emitted before instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
 
-  // Maps instruction with label emitted after instruction.
+  /// Maps instruction with label emitted after instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
 
-  // History of DBG_VALUE and clobber instructions for each user variable.
-  // Variables are listed in order of appearance.
+  /// History of DBG_VALUE and clobber instructions for each user
+  /// variable.  Variables are listed in order of appearance.
   DbgValueHistoryMap DbgValues;
 
-  // Previous instruction's location information. This is used to determine
-  // label location to indicate scope boundries in dwarf debug info.
+  /// Previous instruction's location information. This is used to
+  /// determine label location to indicate scope boundries in dwarf
+  /// debug info.
   DebugLoc PrevInstLoc;
   MCSymbol *PrevLabel;
 
-  // This location indicates end of function prologue and beginning of function
-  // body.
+  /// This location indicates end of function prologue and beginning of
+  /// function body.
   DebugLoc PrologEndLoc;
 
-  // If nonnull, stores the current machine function we're processing.
+  /// If nonnull, stores the current machine function we're processing.
   const MachineFunction *CurFn;
 
-  // If nonnull, stores the current machine instruction we're processing.
+  /// If nonnull, stores the current machine instruction we're processing.
   const MachineInstr *CurMI;
 
-  // If nonnull, stores the CU in which the previous subprogram was contained.
+  /// If nonnull, stores the CU in which the previous subprogram was contained.
   const DwarfCompileUnit *PrevCU;
 
-  // As an optimization, there is no need to emit an entry in the directory
-  // table for the same directory as DW_AT_comp_dir.
+  /// As an optimization, there is no need to emit an entry in the directory
+  /// table for the same directory as DW_AT_comp_dir.
   StringRef CompilationDir;
 
-  // Holder for the file specific debug information.
+  /// Holder for the file specific debug information.
   DwarfFile InfoHolder;
 
-  // Holders for the various debug information flags that we might need to
-  // have exposed. See accessor functions below for description.
+  /// Holders for the various debug information flags that we might need to
+  /// have exposed. See accessor functions below for description.
 
-  // Holder for imported entities.
+  /// Holder for imported entities.
   typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
   ImportedEntityMap;
   ImportedEntityMap ScopesWithImportedEntities;
 
-  // Map from MDNodes for user-defined types to the type units that describe
-  // them.
+  /// Map from MDNodes for user-defined types to the type units that
+  /// describe them.
   DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
 
   SmallVector<
       std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1>
       TypeUnitsUnderConstruction;
 
-  // Whether to emit the pubnames/pubtypes sections.
+  /// Whether to emit the pubnames/pubtypes sections.
   bool HasDwarfPubSections;
 
-  // Whether or not to use AT_ranges for compilation units.
+  /// Whether or not to use AT_ranges for compilation units.
   bool HasCURanges;
 
-  // Whether we emitted a function into a section other than the default
-  // text.
+  /// Whether we emitted a function into a section other than the
+  /// default text.
   bool UsedNonDefaultText;
 
-  // Whether to use the GNU TLS opcode (instead of the standard opcode).
+  /// Whether to use the GNU TLS opcode (instead of the standard opcode).
   bool UseGNUTLSOpcode;
 
-  // Version of dwarf we're emitting.
+  /// Version of dwarf we're emitting.
   unsigned DwarfVersion;
 
-  // Maps from a type identifier to the actual MDNode.
+  /// Maps from a type identifier to the actual MDNode.
   DITypeIdentifierMap TypeIdentifierMap;
 
-  // DWARF5 Experimental Options
+  /// DWARF5 Experimental Options
+  /// @{
   bool HasDwarfAccelTables;
   bool HasSplitDwarf;
 
-  // Separated Dwarf Variables
-  // In general these will all be for bits that are left in the
-  // original object file, rather than things that are meant
-  // to be in the .dwo sections.
+  /// Separated Dwarf Variables
+  /// In general these will all be for bits that are left in the
+  /// original object file, rather than things that are meant
+  /// to be in the .dwo sections.
 
-  // Holder for the skeleton information.
+  /// Holder for the skeleton information.
   DwarfFile SkeletonHolder;
 
-  /// Store file names for type units under fission in a line table header that
-  /// will be emitted into debug_line.dwo.
-  // FIXME: replace this with a map from comp_dir to table so that we can emit
-  // multiple tables during LTO each of which uses directory 0, referencing the
-  // comp_dir of all the type units that use it.
+  /// Store file names for type units under fission in a line table
+  /// header that will be emitted into debug_line.dwo.
+  // FIXME: replace this with a map from comp_dir to table so that we
+  // can emit multiple tables during LTO each of which uses directory
+  // 0, referencing the comp_dir of all the type units that use it.
   MCDwarfDwoLineTable SplitTypeUnitFileTable;
-
-  // True iff there are multiple CUs in this module.
+  /// @}
+  
+  /// True iff there are multiple CUs in this module.
   bool SingleCU;
   bool IsDarwin;
   bool IsPS4;
@@ -354,7 +357,7 @@ class DwarfDebug : public AsmPrinterHandler {
 
   typedef DbgValueHistoryMap::InlinedVariable InlinedVariable;
 
-  /// \brief Find abstract variable associated with Var.
+  /// Find abstract variable associated with Var.
   DbgVariable *getExistingAbstractVariable(InlinedVariable IV,
                                            const DILocalVariable *&Cleansed);
   DbgVariable *getExistingAbstractVariable(InlinedVariable IV);
@@ -366,56 +369,56 @@ class DwarfDebug : public AsmPrinterHandler {
 
   DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV);
 
-  /// \brief Construct a DIE for this abstract scope.
+  /// Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
 
-  /// \brief Compute the size and offset of a DIE given an incoming Offset.
+  /// Compute the size and offset of a DIE given an incoming Offset.
   unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
 
-  /// \brief Compute the size and offset of all the DIEs.
+  /// Compute the size and offset of all the DIEs.
   void computeSizeAndOffsets();
 
-  /// \brief Collect info for variables that were optimized out.
+  /// Collect info for variables that were optimized out.
   void collectDeadVariables();
 
   void finishVariableDefinitions();
 
   void finishSubprogramDefinitions();
 
-  /// \brief Finish off debug information after all functions have been
+  /// Finish off debug information after all functions have been
   /// processed.
   void finalizeModuleInfo();
 
-  /// \brief Emit the debug info section.
+  /// Emit the debug info section.
   void emitDebugInfo();
 
-  /// \brief Emit the abbreviation section.
+  /// Emit the abbreviation section.
   void emitAbbreviations();
 
-  /// \brief Emit a specified accelerator table.
+  /// Emit a specified accelerator table.
   void emitAccel(DwarfAccelTable &Accel, MCSection *Section,
                  StringRef TableName);
 
-  /// \brief Emit visible names into a hashed accelerator table section.
+  /// Emit visible names into a hashed accelerator table section.
   void emitAccelNames();
 
-  /// \brief Emit objective C classes and categories into a hashed
+  /// Emit objective C classes and categories into a hashed
   /// accelerator table section.
   void emitAccelObjC();
 
-  /// \brief Emit namespace dies into a hashed accelerator table.
+  /// Emit namespace dies into a hashed accelerator table.
   void emitAccelNamespaces();
 
-  /// \brief Emit type dies into a hashed accelerator table.
+  /// Emit type dies into a hashed accelerator table.
   void emitAccelTypes();
 
-  /// \brief Emit visible names into a debug pubnames section.
+  /// Emit visible names into a debug pubnames section.
   /// \param GnuStyle determines whether or not we want to emit
   /// additional information into the table ala newer gcc for gdb
   /// index.
   void emitDebugPubNames(bool GnuStyle = false);
 
-  /// \brief Emit visible types into a debug pubtypes section.
+  /// Emit visible types into a debug pubtypes section.
   /// \param GnuStyle determines whether or not we want to emit
   /// additional information into the table ala newer gcc for gdb
   /// index.
@@ -425,91 +428,91 @@ class DwarfDebug : public AsmPrinterHandler {
       bool GnuStyle, MCSection *PSec, StringRef Name,
       const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const);
 
-  /// \brief Emit visible names into a debug str section.
+  /// Emit visible names into a debug str section.
   void emitDebugStr();
 
-  /// \brief Emit visible names into a debug loc section.
+  /// Emit visible names into a debug loc section.
   void emitDebugLoc();
 
-  /// \brief Emit visible names into a debug loc dwo section.
+  /// Emit visible names into a debug loc dwo section.
   void emitDebugLocDWO();
 
-  /// \brief Emit visible names into a debug aranges section.
+  /// Emit visible names into a debug aranges section.
   void emitDebugARanges();
 
-  /// \brief Emit visible names into a debug ranges section.
+  /// Emit visible names into a debug ranges section.
   void emitDebugRanges();
 
-  /// \brief Emit inline info using custom format.
+  /// Emit inline info using custom format.
   void emitDebugInlineInfo();
 
   /// DWARF 5 Experimental Split Dwarf Emitters
 
-  /// \brief Initialize common features of skeleton units.
+  /// Initialize common features of skeleton units.
   void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
                         std::unique_ptr<DwarfUnit> NewU);
 
-  /// \brief Construct the split debug info compile unit for the debug info
+  /// Construct the split debug info compile unit for the debug info
   /// section.
   DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
 
-  /// \brief Construct the split debug info compile unit for the debug info
+  /// Construct the split debug info compile unit for the debug info
   /// section.
   DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU);
 
-  /// \brief Emit the debug info dwo section.
+  /// Emit the debug info dwo section.
   void emitDebugInfoDWO();
 
-  /// \brief Emit the debug abbrev dwo section.
+  /// Emit the debug abbrev dwo section.
   void emitDebugAbbrevDWO();
 
-  /// \brief Emit the debug line dwo section.
+  /// Emit the debug line dwo section.
   void emitDebugLineDWO();
 
-  /// \brief Emit the debug str dwo section.
+  /// Emit the debug str dwo section.
   void emitDebugStrDWO();
 
   /// Flags to let the linker know we have emitted new style pubnames. Only
   /// emit it here if we don't have a skeleton CU for split dwarf.
   void addGnuPubAttributes(DwarfUnit &U, DIE &D) const;
 
-  /// \brief Create new DwarfCompileUnit for the given metadata node with tag
+  /// Create new DwarfCompileUnit for the given metadata node with tag
   /// DW_TAG_compile_unit.
   DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit);
 
-  /// \brief Construct imported_module or imported_declaration DIE.
+  /// Construct imported_module or imported_declaration DIE.
   void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
                                         const DIImportedEntity *N);
 
-  /// \brief Register a source line with debug info. Returns the unique
+  /// Register a source line with debug info. Returns the unique
   /// label that was emitted and which provides correspondence to the
   /// source line list.
   void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
                         unsigned Flags);
 
-  /// \brief Indentify instructions that are marking the beginning of or
+  /// Indentify instructions that are marking the beginning of or
   /// ending of a scope.
   void identifyScopeMarkers();
 
-  /// \brief Populate LexicalScope entries with variables' info.
+  /// Populate LexicalScope entries with variables' info.
   void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP,
                            DenseSet<InlinedVariable> &ProcessedVars);
 
-  /// \brief Build the location list for all DBG_VALUEs in the
+  /// Build the location list for all DBG_VALUEs in the
   /// function that describe the same variable.
   void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
                          const DbgValueHistoryMap::InstrRanges &Ranges);
 
-  /// \brief Collect variable information from the side table maintained
+  /// Collect variable information from the side table maintained
   /// by MMI.
   void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
 
-  /// \brief Ensure that a label will be emitted before MI.
+  /// Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
     LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
   }
 
-  /// \brief Ensure that a label will be emitted after MI.
+  /// Ensure that a label will be emitted after MI.
   void requestLabelAfterInsn(const MachineInstr *MI) {
     LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
   }
@@ -522,50 +525,50 @@ public:
 
   ~DwarfDebug() override;
 
-  /// \brief Emit all Dwarf sections that should come prior to the
+  /// Emit all Dwarf sections that should come prior to the
   /// content.
   void beginModule();
 
-  /// \brief Emit all Dwarf sections that should come after the content.
+  /// Emit all Dwarf sections that should come after the content.
   void endModule() override;
 
-  /// \brief Gather pre-function debug information.
+  /// Gather pre-function debug information.
   void beginFunction(const MachineFunction *MF) override;
 
-  /// \brief Gather and emit post-function debug information.
+  /// Gather and emit post-function debug information.
   void endFunction(const MachineFunction *MF) override;
 
-  /// \brief Process beginning of an instruction.
+  /// Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI) override;
 
-  /// \brief Process end of an instruction.
+  /// Process end of an instruction.
   void endInstruction() override;
 
-  /// \brief Add a DIE to the set of types that we're going to pull into
+  /// Add a DIE to the set of types that we're going to pull into
   /// type units.
   void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
                             DIE &Die, const DICompositeType *CTy);
 
-  /// \brief Add a label so that arange data can be generated for it.
+  /// Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
 
-  /// \brief For symbols that have a size designated (e.g. common symbols),
+  /// For symbols that have a size designated (e.g. common symbols),
   /// this tracks that size.
   void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {
     SymSize[Sym] = Size;
   }
 
-  /// \brief Returns whether to use DW_OP_GNU_push_tls_address, instead of the
+  /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
   /// standard DW_OP_form_tls_address opcode
   bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
 
   // Experimental DWARF5 features.
 
-  /// \brief Returns whether or not to emit tables that dwarf consumers can
+  /// Returns whether or not to emit tables that dwarf consumers can
   /// use to accelerate lookup.
   bool useDwarfAccelTables() const { return HasDwarfAccelTables; }
 
-  /// \brief Returns whether or not to change the current debug info for the
+  /// Returns whether or not to change the current debug info for the
   /// split dwarf proposal support.
   bool useSplitDwarf() const { return HasSplitDwarf; }
 
@@ -579,7 +582,7 @@ public:
   /// Returns the entries for the .debug_loc section.
   const DebugLocStream &getDebugLocs() const { return DebugLocs; }
 
-  /// \brief Emit an entry for the debug loc section. This can be used to
+  /// Emit an entry for the debug loc section. This can be used to
   /// handle an entry that's going to be emitted into the debug loc section.
   void emitDebugLocEntry(ByteStreamer &Streamer,
                          const DebugLocStream::Entry &Entry);
@@ -592,7 +595,7 @@ public:
     return Ref.resolve(TypeIdentifierMap);
   }
 
-  /// \brief Return the TypeIdentifierMap.
+  /// Return the TypeIdentifierMap.
   const DITypeIdentifierMap &getTypeIdentifierMap() const {
     return TypeIdentifierMap;
   }
@@ -627,14 +630,14 @@ public:
         less_first()));
   }
 
-  /// \brief A helper function to check whether the DIE for a given Scope is
+  /// A helper function to check whether the DIE for a given Scope is
   /// going to be null.
   bool isLexicalScopeDIENull(LexicalScope *Scope);
 
-  /// \brief Return Label preceding the instruction.
+  /// Return Label preceding the instruction.
   MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
 
-  /// \brief Return Label immediately following the instruction.
+  /// Return Label immediately following the instruction.
   MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
   // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 4000ae48a856..44d9d2245dda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -113,7 +113,7 @@ protected:
   DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
             DwarfDebug *DW, DwarfFile *DWU);
 
-  /// \brief Add a string attribute data and value.
+  /// Add a string attribute data and value.
   ///
   /// This is guaranteed to be in the local string pool instead of indirected.
   void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
@@ -142,10 +142,10 @@ public:
   unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
   void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
 
-  /// \brief Return true if this compile unit has something to write out.
+  /// Return true if this compile unit has something to write out.
   bool hasContent() const { return UnitDie.hasChildren(); }
 
-  /// \brief Get string containing language specific context for a global name.
+  /// Get string containing language specific context for a global name.
   ///
   /// Walks the metadata parent chain in a language specific manner (using the
   /// compile unit language) and returns it as a string. This is done at the
@@ -162,42 +162,42 @@ public:
   virtual void addGlobalType(const DIType *Ty, const DIE &Die,
                              const DIScope *Context) {}
 
-  /// \brief Add a new name to the namespace accelerator table.
+  /// Add a new name to the namespace accelerator table.
   void addAccelNamespace(StringRef Name, const DIE &Die);
 
-  /// \brief Returns the DIE map slot for the specified debug variable.
+  /// Returns the DIE map slot for the specified debug variable.
   ///
   /// We delegate the request to DwarfDebug when the MDNode can be part of the
   /// type system, since DIEs for the type system can be shared across CUs and
   /// the mappings are kept in DwarfDebug.
   DIE *getDIE(const DINode *D) const;
 
-  /// \brief Returns a fresh newly allocated DIELoc.
+  /// Returns a fresh newly allocated DIELoc.
   DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; }
 
-  /// \brief Insert DIE into the map.
+  /// Insert DIE into the map.
   ///
   /// We delegate the request to DwarfDebug when the MDNode can be part of the
   /// type system, since DIEs for the type system can be shared across CUs and
   /// the mappings are kept in DwarfDebug.
   void insertDIE(const DINode *Desc, DIE *D);
 
-  /// \brief Add a flag that is true to the DIE.
+  /// Add a flag that is true to the DIE.
   void addFlag(DIE &Die, dwarf::Attribute Attribute);
 
-  /// \brief Add an unsigned integer attribute data and value.
+  /// Add an unsigned integer attribute data and value.
   void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
                uint64_t Integer);
 
   void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
 
-  /// \brief Add an signed integer attribute data and value.
+  /// Add an signed integer attribute data and value.
   void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
                int64_t Integer);
 
   void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
 
-  /// \brief Add a string attribute data and value.
+  /// Add a string attribute data and value.
   ///
   /// We always emit a reference to the string pool instead of immediate
   /// strings so that DIEs have more predictable sizes. In the case of split
@@ -205,38 +205,38 @@ public:
   /// into the string table.
   void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
-  /// \brief Add a Dwarf label attribute data and value.
+  /// Add a Dwarf label attribute data and value.
   DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute,
                                dwarf::Form Form, const MCSymbol *Label);
 
   void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
 
-  /// \brief Add an offset into a section attribute data and value.
+  /// Add an offset into a section attribute data and value.
   void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
 
-  /// \brief Add a dwarf op address data and value using the form given and an
+  /// Add a dwarf op address data and value using the form given and an
   /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
   void addOpAddress(DIELoc &Die, const MCSymbol *Label);
 
-  /// \brief Add a label delta attribute data and value.
+  /// Add a label delta attribute data and value.
   void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
                      const MCSymbol *Lo);
 
-  /// \brief Add a DIE attribute data and value.
+  /// Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
 
-  /// \brief Add a DIE attribute data and value.
+  /// Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
 
   void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
 
-  /// \brief Add block data.
+  /// Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
 
-  /// \brief Add block data.
+  /// Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
 
-  /// \brief Add location information to specified debug information entry.
+  /// Add location information to specified debug information entry.
   void addSourceLine(DIE &Die, unsigned Line, StringRef File,
                      StringRef Directory);
   void addSourceLine(DIE &Die, const DILocalVariable *V);
@@ -246,30 +246,30 @@ public:
   void addSourceLine(DIE &Die, const DINamespace *NS);
   void addSourceLine(DIE &Die, const DIObjCProperty *Ty);
 
-  /// \brief Add constant value entry in variable DIE.
+  /// Add constant value entry in variable DIE.
   void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty);
   void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
   void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
   void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
   void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
 
-  /// \brief Add constant value entry in variable DIE.
+  /// Add constant value entry in variable DIE.
   void addConstantFPValue(DIE &Die, const MachineOperand &MO);
   void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
 
-  /// \brief Add a linkage name, if it isn't empty.
+  /// Add a linkage name, if it isn't empty.
   void addLinkageName(DIE &Die, StringRef LinkageName);
 
-  /// \brief Add template parameters in buffer.
+  /// Add template parameters in buffer.
   void addTemplateParams(DIE &Buffer, DINodeArray TParams);
 
-  /// \brief Add register operand.
+  /// Add register operand.
   /// \returns false if the register does not exist, e.g., because it was never
   /// materialized.
   bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
                           unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
 
-  /// \brief Add register offset.
+  /// Add register offset.
   /// \returns false if the register does not exist, e.g., because it was never
   /// materialized.
   bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
@@ -283,7 +283,7 @@ public:
                             dwarf::Attribute Attribute,
                             const MachineLocation &Location);
 
-  /// \brief Add a new type attribute to the specified entity.
+  /// Add a new type attribute to the specified entity.
   ///
   /// This takes and attribute parameter because DW_AT_friend attributes are
   /// also type references.
@@ -297,19 +297,19 @@ public:
   void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
                                  bool Minimal = false);
 
-  /// \brief Find existing DIE or create new DIE for the given type.
+  /// Find existing DIE or create new DIE for the given type.
   DIE *getOrCreateTypeDIE(const MDNode *N);
 
-  /// \brief Get context owner's DIE.
+  /// Get context owner's DIE.
   DIE *createTypeDIE(const DICompositeType *Ty);
 
-  /// \brief Get context owner's DIE.
+  /// Get context owner's DIE.
   DIE *getOrCreateContextDIE(const DIScope *Context);
 
-  /// \brief Construct DIEs for types that contain vtables.
+  /// Construct DIEs for types that contain vtables.
   void constructContainingTypeDIEs();
 
-  /// \brief Construct function argument DIEs.
+  /// Construct function argument DIEs.
   void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args);
 
   /// Create a DIE with the given Tag, add the DIE to its parent, and
@@ -332,14 +332,14 @@ public:
   void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
 
 protected:
-  /// \brief Create new static data member DIE.
+  /// Create new static data member DIE.
   DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
 
   /// Look up the source ID with the given directory and source file names. If
   /// none currently exists, create a new ID and insert it in the line table.
   virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
 
-  /// \brief Look in the DwarfDebug map for the MDNode that corresponds to the
+  /// Look in the DwarfDebug map for the MDNode that corresponds to the
   /// reference.
   template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
     return DD->resolve(Ref);
@@ -358,15 +358,15 @@ private:
   void constructTemplateValueParameterDIE(DIE &Buffer,
                                           const DITemplateValueParameter *TVP);
 
-  /// \brief Return the default lower bound for an array.
+  /// Return the default lower bound for an array.
   ///
   /// If the DWARF version doesn't handle the language, return -1.
   int64_t getDefaultLowerBound() const;
 
-  /// \brief Get an anonymous type for index type.
+  /// Get an anonymous type for index type.
   DIE *getIndexTyDie();
 
-  /// \brief Set D as anonymous type for index which can be reused later.
+  /// Set D as anonymous type for index which can be reused later.
   void setIndexTyDie(DIE *D) { IndexTyDie = D; }
 
   /// If this is a named finished type then include it in the list of types for
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 1be3fd74d602..49ef8d3ddc8f 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -309,7 +309,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
   // If some instruction between the previous try-range and the end of the
   // function may throw, create a call-site entry with no landing pad for the
   // region following the try-range.
-  if (SawPotentiallyThrowing && !IsSJLJ) {
+  if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) {
     CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
     CallSites.push_back(Site);
   }
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 535b1f605853..6610ac78f8c4 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -97,7 +97,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
   MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
   Asm->OutStreamer->EmitLabel(MCL);
   CurFn->Instrs.push_back(MCL);
-  InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine());
+  InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol());
 }
 
 WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
@@ -264,22 +264,38 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   // Identify the function this subsection is for.
   Asm->OutStreamer->EmitCOFFSecRel32(Fn);
   Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
-  // Insert padding after a 16-bit section index.
-  Asm->EmitInt16(0);
+  // Insert flags after a 16-bit section index.
+  Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS);
 
   // Length of the function's code, in bytes.
   EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
 
   // PC-to-linenumber lookup table:
   MCSymbol *FileSegmentEnd = nullptr;
+
+  // The start of the last segment:
+  size_t LastSegmentStart = 0;
+
+  auto FinishPreviousChunk = [&] {
+    if (!FileSegmentEnd)
+      return;
+    for (size_t ColSegI = LastSegmentStart,
+                ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
+         ColSegI != ColSegEnd; ++ColSegI) {
+      unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
+      Asm->EmitInt16(ColumnNumber); // Start column
+      Asm->EmitInt16(ColumnNumber); // End column
+    }
+    Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+  };
+
   for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
     MCSymbol *Instr = FI.Instrs[J];
     assert(InstrInfo.count(Instr));
 
     if (FilenameSegmentLengths.count(J)) {
       // We came to a beginning of a new filename segment.
-      if (FileSegmentEnd)
-        Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+      FinishPreviousChunk();
       StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
       assert(FileNameRegistry.Infos.count(CurFilename));
       size_t IndexInStringTable =
@@ -300,6 +316,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
       // records.
       FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
       EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
+      LastSegmentStart = J;
     }
 
     // The first PC with the given linenumber and the linenumber itself.
@@ -307,8 +324,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
     Asm->EmitInt32(InstrInfo[Instr].LineNumber);
   }
 
-  if (FileSegmentEnd)
-    Asm->OutStreamer->EmitLabel(FileSegmentEnd);
+  FinishPreviousChunk();
   Asm->OutStreamer->EmitLabel(LineTableEnd);
 }
 
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index a5b399f73707..43d1a432712e 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -52,11 +52,13 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
   struct InstrInfoTy {
     StringRef Filename;
     unsigned LineNumber;
+    unsigned ColumnNumber;
 
-    InstrInfoTy() : LineNumber(0) {}
+    InstrInfoTy() : LineNumber(0), ColumnNumber(0) {}
 
-    InstrInfoTy(StringRef Filename, unsigned LineNumber)
-        : Filename(Filename), LineNumber(LineNumber) {}
+    InstrInfoTy(StringRef Filename, unsigned LineNumber, unsigned ColumnNumber)
+        : Filename(Filename), LineNumber(LineNumber),
+          ColumnNumber(ColumnNumber) {}
   };
   DenseMap<MCSymbol *, InstrInfoTy> InstrInfo;
 
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 79830bc3443b..71c77815e281 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -70,19 +70,27 @@ void WinException::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = MMI->getPersonality();
+  const Function *Per = nullptr;
+  if (F->hasPersonalityFn())
+    Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
 
-  shouldEmitPersonality = hasLandingPads &&
-    PerEncoding != dwarf::DW_EH_PE_omit && Per;
+  bool forceEmitPersonality =
+    F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
+    F->needsUnwindTableEntry();
+
+  shouldEmitPersonality = forceEmitPersonality || (hasLandingPads &&
+    PerEncoding != dwarf::DW_EH_PE_omit && Per);
 
   unsigned LSDAEncoding = TLOF.getLSDAEncoding();
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
-  // If we're not using CFI, we don't want the CFI or the personality. Emit the
-  // LSDA if this is the parent function.
+  // If we're not using CFI, we don't want the CFI or the personality. If
+  // WinEHPrepare outlined something, we should emit the LSDA.
   if (!Asm->MAI->usesWindowsCFI()) {
-    shouldEmitLSDA = (hasLandingPads && F == ParentF);
+    bool HasOutlinedChildren =
+        F->hasFnAttribute("wineh-parent") && F == ParentF;
+    shouldEmitLSDA = HasOutlinedChildren;
     shouldEmitPersonality = false;
     return;
   }
@@ -121,7 +129,10 @@ void WinException::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA)
     return;
 
-  EHPersonality Per = MMI->getPersonalityType();
+  const Function *F = MF->getFunction();
+  EHPersonality Per = EHPersonality::Unknown;
+  if (F->hasPersonalityFn())
+    Per = classifyEHPersonality(F->getPersonalityFn());
 
   // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
   // Windows EH schemes, the landing pad is not actually reachable. It only
@@ -350,6 +361,7 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
   // EHFlags & 1 -> Synchronous exceptions only, no async exceptions.
   // EHFlags & 2 -> ???
   // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
+  OS.EmitValueToAlignment(4);
   OS.EmitLabel(FuncInfoXData);
   OS.EmitIntValue(0x19930522, 4);                      // MagicNumber
   OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4);       // MaxState
@@ -555,7 +567,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
   // we've code generated the parent, we can emit the label assignment that
   // those helpers use to get the offset of the registration node.
   assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
-         "no EH reg node frameescape index");
+         "no EH reg node localescape index");
   MCSymbol *ParentFrameOffset =
       Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
   MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
@@ -578,9 +590,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
 
   // Emit the __ehtable label that we use for llvm.x86.seh.lsda.
   MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName);
+  OS.EmitValueToAlignment(4);
   OS.EmitLabel(LSDALabel);
 
-  const Function *Per = MMI->getPersonality();
+  const Function *Per =
+      dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
   StringRef PerName = Per->getName();
   int BaseState = -1;
   if (PerName == "_except_handler4") {
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 82f5c482408a..db00910cd018 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -34,4 +34,5 @@ cl::opt<unsigned>
                                     cl::Hidden);
 
 BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
-    : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+    : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+      TLI(ST->getTargetLowering()) {}
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 70de4e7ebd11..6ab6acc03722 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -147,10 +147,13 @@ class TypePromotionTransaction;
     /// OptSize - True if optimizing for size.
     bool OptSize;
 
+    /// DataLayout for the Function being processed.
+    const DataLayout *DL;
+
   public:
     static char ID; // Pass identification, replacement for typeid
     explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
-        : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr) {
+        : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) {
         initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
       }
     bool runOnFunction(Function &F) override;
@@ -203,6 +206,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
 
+  DL = &F.getParent()->getDataLayout();
+
   bool EverMadeChange = false;
   // Clear per function information.
   InsertedInsts.clear();
@@ -753,10 +758,11 @@ static bool SinkCast(CastInst *CI) {
 ///
 /// Return true if any changes are made.
 ///
-static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
+                                       const DataLayout &DL) {
   // If this is a noop copy,
-  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
-  EVT DstVT = TLI.getValueType(CI->getType());
+  EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(DL, CI->getType());
 
   // This is an fp<->int conversion?
   if (SrcVT.isInteger() != DstVT.isInteger())
@@ -921,7 +927,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) {
 static bool
 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
-                     const TargetLowering &TLI) {
+                     const TargetLowering &TLI, const DataLayout &DL) {
   BasicBlock *UserBB = User->getParent();
   DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
   TruncInst *TruncI = dyn_cast<TruncInst>(User);
@@ -947,7 +953,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
     // approximation; some nodes' legality is determined by the
     // operand or other means. There's no good way to find out though.
     if (TLI.isOperationLegalOrCustom(
-            ISDOpcode, TLI.getValueType(TruncUser->getType(), true)))
+            ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
       continue;
 
     // Don't bother for PHI nodes.
@@ -1005,13 +1011,14 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
 /// instruction.
 /// Return true if any changes are made.
 static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
-                                const TargetLowering &TLI) {
+                                const TargetLowering &TLI,
+                                const DataLayout &DL) {
   BasicBlock *DefBB = ShiftI->getParent();
 
   /// Only insert instructions in each block once.
   DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
 
-  bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
+  bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
 
   bool MadeChange = false;
   for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
@@ -1048,9 +1055,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
       if (isa<TruncInst>(User) && shiftIsLegal
           // If the type of the truncate is legal, no trucate will be
           // introduced in other basic blocks.
-          && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
+          &&
+          (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
         MadeChange =
-            SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
+            SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
 
       continue;
     }
@@ -1307,12 +1315,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       return true;
   }
 
-  const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
-
   // Align the pointer arguments to this call if the target thinks it's a good
   // idea
   unsigned MinSize, PrefAlign;
-  if (TLI && TD && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
+  if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
     for (auto &Arg : CI->arg_operands()) {
       // We want to align both objects whose address is used directly and
       // objects whose address is used in casts and GEPs, though it only makes
@@ -1320,36 +1326,34 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       // if size - offset meets the size threshold.
       if (!Arg->getType()->isPointerTy())
         continue;
-      APInt Offset(TD->getPointerSizeInBits(
-                     cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
-      Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
+      APInt Offset(DL->getPointerSizeInBits(
+                       cast<PointerType>(Arg->getType())->getAddressSpace()),
+                   0);
+      Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
       uint64_t Offset2 = Offset.getLimitedValue();
       if ((Offset2 & (PrefAlign-1)) != 0)
         continue;
       AllocaInst *AI;
-      if ((AI = dyn_cast<AllocaInst>(Val)) &&
-          AI->getAlignment() < PrefAlign &&
-          TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
+      if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
+          DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
         AI->setAlignment(PrefAlign);
       // Global variables can only be aligned if they are defined in this
       // object (i.e. they are uniquely initialized in this object), and
       // over-aligning global variables that have an explicit section is
       // forbidden.
       GlobalVariable *GV;
-      if ((GV = dyn_cast<GlobalVariable>(Val)) &&
-          GV->hasUniqueInitializer() &&
-          !GV->hasSection() &&
-          GV->getAlignment() < PrefAlign &&
-          TD->getTypeAllocSize(
-            GV->getType()->getElementType()) >= MinSize + Offset2)
+      if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() &&
+          !GV->hasSection() && GV->getAlignment() < PrefAlign &&
+          DL->getTypeAllocSize(GV->getType()->getElementType()) >=
+              MinSize + Offset2)
         GV->setAlignment(PrefAlign);
     }
     // If this is a memcpy (or similar) then we may be able to improve the
     // alignment
     if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
-      unsigned Align = getKnownAlignment(MI->getDest(), *TD);
+      unsigned Align = getKnownAlignment(MI->getDest(), *DL);
       if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
-        Align = std::min(Align, getKnownAlignment(MTI->getSource(), *TD));
+        Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
       if (Align > MI->getAlignment())
         MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
     }
@@ -2099,6 +2103,7 @@ class AddressingModeMatcher {
   SmallVectorImpl<Instruction*> &AddrModeInsts;
   const TargetMachine &TM;
   const TargetLowering &TLI;
+  const DataLayout &DL;
 
   /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
   /// the memory instruction that we're computing this address for.
@@ -2131,8 +2136,9 @@ class AddressingModeMatcher {
       : AddrModeInsts(AMI), TM(TM),
         TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
                  ->getTargetLowering()),
-        AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
-        InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT) {
+        DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
+        MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
+        PromotedInsts(PromotedInsts), TPT(TPT) {
     IgnoreProfitability = false;
   }
 public:
@@ -2199,7 +2205,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
   TestAddrMode.ScaledReg = ScaleReg;
 
   // If the new address isn't legal, bail out.
-  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace))
+  if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
     return false;
 
   // It was legal, so commit it.
@@ -2216,7 +2222,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
 
     // If this addressing mode is legal, commit it and remember that we folded
     // this instruction.
-    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy, AddrSpace)) {
+    if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
       AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
       AddrMode = TestAddrMode;
       return true;
@@ -2262,7 +2268,8 @@ static bool MightBeFoldableInst(Instruction *I) {
 /// \note \p Val is assumed to be the product of some type promotion.
 /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
 /// to be legal, as the non-promoted value would have had the same state.
-static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
+static bool isPromotedInstructionLegal(const TargetLowering &TLI,
+                                       const DataLayout &DL, Value *Val) {
   Instruction *PromotedInst = dyn_cast<Instruction>(Val);
   if (!PromotedInst)
     return false;
@@ -2272,7 +2279,7 @@ static bool isPromotedInstructionLegal(const TargetLowering &TLI, Value *Val) {
     return true;
   // Otherwise, check if the promoted instruction is legal or not.
   return TLI.isOperationLegalOrCustom(
-      ISDOpcode, TLI.getValueType(PromotedInst->getType()));
+      ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
 }
 
 /// \brief Hepler class to perform type promotion.
@@ -2646,7 +2653,7 @@ bool AddressingModeMatcher::IsPromotionProfitable(
   // The promotion is neutral but it may help folding the sign extension in
   // loads for instance.
   // Check that we did not create an illegal instruction.
-  return isPromotedInstructionLegal(TLI, PromotedOperand);
+  return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
 }
 
 /// MatchOperationAddr - Given an instruction or constant expr, see if we can
@@ -2674,12 +2681,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
   case Instruction::PtrToInt:
     // PtrToInt is always a noop, as we know that the int type is pointer sized.
     return MatchAddr(AddrInst->getOperand(0), Depth);
-  case Instruction::IntToPtr:
+  case Instruction::IntToPtr: {
+    auto AS = AddrInst->getType()->getPointerAddressSpace();
+    auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
     // This inttoptr is a no-op if the integer type is pointer sized.
-    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
-        TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
+    if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
       return MatchAddr(AddrInst->getOperand(0), Depth);
     return false;
+  }
   case Instruction::BitCast:
     // BitCast is always a noop, and we can handle it as long as it is
     // int->int or pointer->pointer (we don't want int<->fp or something).
@@ -2752,16 +2761,15 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     unsigned VariableScale = 0;
 
     int64_t ConstantOffset = 0;
-    const DataLayout *TD = TLI.getDataLayout();
     gep_type_iterator GTI = gep_type_begin(AddrInst);
     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-        const StructLayout *SL = TD->getStructLayout(STy);
+        const StructLayout *SL = DL.getStructLayout(STy);
         unsigned Idx =
           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
         ConstantOffset += SL->getElementOffset(Idx);
       } else {
-        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+        uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
         if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
           ConstantOffset += CI->getSExtValue()*TypeSize;
         } else if (TypeSize) {  // Scales of zero don't do anything.
@@ -2781,7 +2789,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     if (VariableOperand == -1) {
       AddrMode.BaseOffs += ConstantOffset;
       if (ConstantOffset == 0 ||
-          TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace)) {
+          TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
         // Check to see if we can fold the base pointer in too.
         if (MatchAddr(AddrInst->getOperand(0), Depth+1))
           return true;
@@ -2904,14 +2912,14 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
     // Fold in immediates if legal for the target.
     AddrMode.BaseOffs += CI->getSExtValue();
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+    if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
       return true;
     AddrMode.BaseOffs -= CI->getSExtValue();
   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
     // If this is a global variable, try to fold it into the addressing mode.
     if (!AddrMode.BaseGV) {
       AddrMode.BaseGV = GV;
-      if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+      if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
         return true;
       AddrMode.BaseGV = nullptr;
     }
@@ -2955,7 +2963,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
     AddrMode.HasBaseReg = true;
     AddrMode.BaseReg = Addr;
     // Still check for legality in case the target supports [imm] but not [i+r].
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+    if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
       return true;
     AddrMode.HasBaseReg = false;
     AddrMode.BaseReg = nullptr;
@@ -2965,7 +2973,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
   if (AddrMode.Scale == 0) {
     AddrMode.Scale = 1;
     AddrMode.ScaledReg = Addr;
-    if (TLI.isLegalAddressingMode(AddrMode, AccessTy, AddrSpace))
+    if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
       return true;
     AddrMode.Scale = 0;
     AddrMode.ScaledReg = nullptr;
@@ -2984,7 +2992,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
   const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
   const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
   TargetLowering::AsmOperandInfoVector TargetConstraints =
-      TLI->ParseConstraints(TRI, ImmutableCallSite(CI));
+      TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+                            ImmutableCallSite(CI));
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
 
@@ -3324,7 +3333,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
                  << *MemoryInst << "\n");
-    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+    Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
     Value *ResultPtr = nullptr, *ResultIndex = nullptr;
 
     // First, find the pointer.
@@ -3443,7 +3452,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   } else {
     DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
                  << *MemoryInst << "\n");
-    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+    Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
     Value *Result = nullptr;
 
     // Start with the base register. Do this first so that subsequent address
@@ -3545,8 +3554,8 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
 
   const TargetRegisterInfo *TRI =
       TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo();
-  TargetLowering::AsmOperandInfoVector
-    TargetConstraints = TLI->ParseConstraints(TRI, CS);
+  TargetLowering::AsmOperandInfoVector TargetConstraints =
+      TLI->ParseConstraints(*DL, TRI, CS);
   unsigned ArgNo = 0;
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
     TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
@@ -3680,7 +3689,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     TotalCreatedInstsCost -= ExtCost;
     if (!StressExtLdPromotion &&
         (TotalCreatedInstsCost > 1 ||
-         !isPromotedInstructionLegal(*TLI, PromotedVal))) {
+         !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
       // The promotion is not profitable, rollback to the previous state.
       TPT.rollback(LastKnownGood);
       continue;
@@ -3735,8 +3744,8 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
   if (!HasPromoted && LI->getParent() == I->getParent())
     return false;
 
-  EVT VT = TLI->getValueType(I->getType());
-  EVT LoadVT = TLI->getValueType(LI->getType());
+  EVT VT = TLI->getValueType(*DL, I->getType());
+  EVT LoadVT = TLI->getValueType(*DL, LI->getType());
 
   // If the load has other users and the truncate is not free, this probably
   // isn't worthwhile.
@@ -4013,6 +4022,9 @@ namespace {
 /// Assuming both extractelement and store can be combine, we get rid of the
 /// transition.
 class VectorPromoteHelper {
+  /// DataLayout associated with the current module.
+  const DataLayout &DL;
+
   /// Used to perform some checks on the legality of vector operations.
   const TargetLowering &TLI;
 
@@ -4086,7 +4098,8 @@ class VectorPromoteHelper {
     unsigned Align = ST->getAlignment();
     // Check if this store is supported.
     if (!TLI.allowsMisalignedMemoryAccesses(
-            TLI.getValueType(ST->getValueOperand()->getType()), AS, Align)) {
+            TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
+            Align)) {
       // If this is not supported, there is no way we can combine
       // the extract with the store.
       return false;
@@ -4181,9 +4194,10 @@ class VectorPromoteHelper {
   }
 
 public:
-  VectorPromoteHelper(const TargetLowering &TLI, const TargetTransformInfo &TTI,
-                      Instruction *Transition, unsigned CombineCost)
-      : TLI(TLI), TTI(TTI), Transition(Transition),
+  VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
+                      const TargetTransformInfo &TTI, Instruction *Transition,
+                      unsigned CombineCost)
+      : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
         StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
     assert(Transition && "Do not know how to promote null");
   }
@@ -4219,7 +4233,7 @@ public:
       return false;
     return StressStoreExtract ||
            TLI.isOperationLegalOrCustom(
-               ISDOpcode, TLI.getValueType(getTransitionType(), true));
+               ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
   }
 
   /// \brief Check whether or not \p Use can be combined
@@ -4323,7 +4337,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
   //      we do not do that for now.
   BasicBlock *Parent = Inst->getParent();
   DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
-  VectorPromoteHelper VPH(*TLI, *TTI, Inst, CombineCost);
+  VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
   // If the transition has more than one use, assume this is not going to be
   // beneficial.
   while (Inst->hasOneUse()) {
@@ -4368,8 +4382,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
     // It is possible for very late stage optimizations (such as SimplifyCFG)
     // to introduce PHI nodes too late to be cleaned up.  If we detect such a
     // trivial PHI, go ahead and zap it here.
-    const DataLayout &DL = I->getModule()->getDataLayout();
-    if (Value *V = SimplifyInstruction(P, DL, TLInfo, nullptr)) {
+    if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) {
       P->replaceAllUsesWith(V);
       P->eraseFromParent();
       ++NumPHIsElim;
@@ -4388,15 +4401,16 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
     if (isa<Constant>(CI->getOperand(0)))
       return false;
 
-    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+    if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
       return true;
 
     if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
       /// Sink a zext or sext into its user blocks if the target type doesn't
       /// fit in one register
-      if (TLI && TLI->getTypeAction(CI->getContext(),
-                                    TLI->getValueType(CI->getType())) ==
-                     TargetLowering::TypeExpandInteger) {
+      if (TLI &&
+          TLI->getTypeAction(CI->getContext(),
+                             TLI->getValueType(*DL, CI->getType())) ==
+              TargetLowering::TypeExpandInteger) {
         return SinkCast(CI);
       } else {
         bool MadeChange = MoveExtToFormExtLoad(I);
@@ -4433,7 +4447,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
                 BinOp->getOpcode() == Instruction::LShr)) {
     ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
     if (TLI && CI && TLI->hasExtractBitsInsn())
-      return OptimizeExtractBits(BinOp, CI, *TLI);
+      return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
 
     return false;
   }
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 963d573ea7f0..941129b5cc95 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -60,7 +60,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
     return false;
 
   // Don't delete frame allocation labels.
-  if (MI->getOpcode() == TargetOpcode::FRAME_ALLOC)
+  if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE)
     return false;
 
   // Don't delete instructions with side effects.
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 5b09cf1a0fd7..201f9c150083 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -733,12 +733,14 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   // If no relevant registers are used in the function, we can skip it
   // completely.
   bool anyregs = false;
+  const MachineRegisterInfo &MRI = mf.getRegInfo();
   for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
-       I != E; ++I)
-    if (MF->getRegInfo().isPhysRegUsed(*I)) {
-      anyregs = true;
-      break;
-    }
+       I != E && !anyregs; ++I)
+    for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
+      if (!MRI.reg_nodbg_empty(*AI)) {
+        anyregs = true;
+        break;
+      }
   if (!anyregs) return false;
 
   // Initialize the AliasMap on the first use.
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 37b3bf17ed1f..6f9e8394081e 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -117,7 +117,6 @@ STATISTIC(NumMerged, "Number of globals merged");
 namespace {
   class GlobalMerge : public FunctionPass {
     const TargetMachine *TM;
-    const DataLayout *DL;
     // FIXME: Infer the maximum possible offset depending on the actual users
     // (these max offsets are different for the users inside Thumb or ARM
     // functions), see the code that passes in the offset in the ARM backend
@@ -160,8 +159,8 @@ namespace {
     explicit GlobalMerge(const TargetMachine *TM = nullptr,
                          unsigned MaximalOffset = 0,
                          bool OnlyOptimizeForSize = false)
-        : FunctionPass(ID), TM(TM), DL(TM->getDataLayout()),
-          MaxOffset(MaximalOffset), OnlyOptimizeForSize(OnlyOptimizeForSize) {
+        : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
+          OnlyOptimizeForSize(OnlyOptimizeForSize) {
       initializeGlobalMergePass(*PassRegistry::getPassRegistry());
     }
 
@@ -188,14 +187,16 @@ INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables",
 
 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                           Module &M, bool isConst, unsigned AddrSpace) const {
+  auto &DL = M.getDataLayout();
   // FIXME: Find better heuristics
-  std::stable_sort(Globals.begin(), Globals.end(),
-                   [this](const GlobalVariable *GV1, const GlobalVariable *GV2) {
-    Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
-    Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+  std::stable_sort(
+      Globals.begin(), Globals.end(),
+      [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+        Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+        Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
 
-    return (DL->getTypeAllocSize(Ty1) < DL->getTypeAllocSize(Ty2));
-  });
+        return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2));
+      });
 
   // If we want to just blindly group all globals together, do so.
   if (!GlobalMergeGroupByUse) {
@@ -410,6 +411,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
                           unsigned AddrSpace) const {
 
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
+  auto &DL = M.getDataLayout();
 
   assert(Globals.size() > 1);
 
@@ -427,7 +429,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
     GlobalVariable *TheFirstExternal = 0;
     for (j = i; j != -1; j = GlobalSet.find_next(j)) {
       Type *Ty = Globals[j]->getType()->getElementType();
-      MergedSize += DL->getTypeAllocSize(Ty);
+      MergedSize += DL.getTypeAllocSize(Ty);
       if (MergedSize > MaxOffset) {
         break;
       }
@@ -526,6 +528,7 @@ bool GlobalMerge::doInitialization(Module &M) {
   if (!EnableGlobalMerge)
     return false;
 
+  auto &DL = M.getDataLayout();
   DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
                                                         BSSGlobals;
   bool Changed = false;
@@ -548,9 +551,9 @@ bool GlobalMerge::doInitialization(Module &M) {
     unsigned AddressSpace = PT->getAddressSpace();
 
     // Ignore fancy-aligned globals for now.
-    unsigned Alignment = DL->getPreferredAlignment(I);
+    unsigned Alignment = DL.getPreferredAlignment(I);
     Type *Ty = I->getType()->getElementType();
-    if (Alignment > DL->getABITypeAlignment(Ty))
+    if (Alignment > DL.getABITypeAlignment(Ty))
       continue;
 
     // Ignore all 'special' globals.
@@ -562,7 +565,7 @@ bool GlobalMerge::doInitialization(Module &M) {
     if (isMustKeepGlobalVariable(I))
       continue;
 
-    if (DL->getTypeAllocSize(Ty) < MaxOffset) {
+    if (DL.getTypeAllocSize(Ty) < MaxOffset) {
       if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
         BSSGlobals[AddressSpace].push_back(I);
       else if (I->isConstant())
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index a02cd67ac649..93e04876a8f3 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -25,9 +25,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -47,6 +50,11 @@ static cl::opt<unsigned> PageSize("imp-null-check-page-size",
                                            "bytes"),
                                   cl::init(4096));
 
+#define DEBUG_TYPE "implicit-null-checks"
+
+STATISTIC(NumImplicitNullChecks,
+          "Number of explicit null checks made implicit");
+
 namespace {
 
 class ImplicitNullChecks : public MachineFunctionPass {
@@ -171,6 +179,9 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   //   callq throw_NullPointerException
   //
   //  LblNotNull:
+  //   Inst0
+  //   Inst1
+  //   ...
   //   Def = Load (%RAX + <offset>)
   //   ...
   //
@@ -181,6 +192,8 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   //   jmp LblNotNull ;; explicit or fallthrough
   //
   //  LblNotNull:
+  //   Inst0
+  //   Inst1
   //   ...
   //
   //  LblNull:
@@ -188,15 +201,75 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   //
 
   unsigned PointerReg = MBP.LHS.getReg();
-  MachineInstr *MemOp = &*NotNullSucc->begin();
-  unsigned BaseReg, Offset;
-  if (TII->getMemOpBaseRegImmOfs(MemOp, BaseReg, Offset, TRI))
-    if (MemOp->mayLoad() && !MemOp->isPredicable() && BaseReg == PointerReg &&
-        Offset < PageSize && MemOp->getDesc().getNumDefs() == 1) {
-      NullCheckList.emplace_back(MemOp, MBP.ConditionDef, &MBB, NotNullSucc,
-                                 NullSucc);
-      return true;
+
+  // As we scan NotNullSucc for a suitable load instruction, we keep track of
+  // the registers defined and used by the instructions we scan past.  This bit
+  // of information lets us decide if it is legal to hoist the load instruction
+  // we find (if we do find such an instruction) to before NotNullSucc.
+  DenseSet<unsigned> RegDefs, RegUses;
+
+  // Returns true if it is safe to reorder MI to before NotNullSucc.
+  auto IsSafeToHoist = [&](MachineInstr *MI) {
+    // Right now we don't want to worry about LLVM's memory model.  This can be
+    // made more precise later.
+    for (auto *MMO : MI->memoperands())
+      if (!MMO->isUnordered())
+        return false;
+
+    for (auto &MO : MI->operands()) {
+      if (MO.isReg() && MO.getReg()) {
+        for (unsigned Reg : RegDefs)
+          if (TRI->regsOverlap(Reg, MO.getReg()))
+            return false;  // We found a write-after-write or read-after-write
+
+        if (MO.isDef())
+          for (unsigned Reg : RegUses)
+            if (TRI->regsOverlap(Reg, MO.getReg()))
+              return false;  // We found a write-after-read
+      }
+    }
+
+    return true;
+  };
+
+  for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
+       ++MII) {
+    MachineInstr *MI = &*MII;
+    unsigned BaseReg, Offset;
+    if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+      if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg &&
+          Offset < PageSize && MI->getDesc().getNumDefs() == 1 &&
+          IsSafeToHoist(MI)) {
+        NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc,
+                                   NullSucc);
+        return true;
+      }
+
+    // MI did not match our criteria for conversion to a trapping load.  Check
+    // if we can continue looking.
+
+    if (MI->mayStore() || MI->hasUnmodeledSideEffects())
+      return false;
+
+    for (auto *MMO : MI->memoperands())
+      // Right now we don't want to worry about LLVM's memory model.
+      if (!MMO->isUnordered())
+        return false;
+
+    // It _may_ be okay to reorder a later load instruction across MI.  Make a
+    // note of its operands so that we can make the legality check if we find a
+    // suitable load instruction:
+
+    for (auto &MO : MI->operands()) {
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+
+      if (MO.isDef())
+        RegDefs.insert(MO.getReg());
+      else
+        RegUses.insert(MO.getReg());
     }
+  }
 
   return false;
 }
@@ -247,7 +320,7 @@ void ImplicitNullChecks::rewriteNullChecks(
     // touch the successors list for any basic block since we haven't changed
     // control flow, we've just made it implicit.
     insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel);
-    NC.MemOperation->removeFromParent();
+    NC.MemOperation->eraseFromParent();
     NC.CheckOperation->eraseFromParent();
 
     // Insert an *unconditional* branch to not-null successor.
@@ -257,6 +330,8 @@ void ImplicitNullChecks::rewriteNullChecks(
     // Emit the HandlerLabel as an EH_LABEL.
     BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL,
             TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel);
+
+    NumImplicitNullChecks++;
   }
 }
 
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index b486bdc91453..37299eb664cf 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -90,8 +90,8 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static MCContext *
 addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
-                        bool DisableVerify, AnalysisID StartAfter,
-                        AnalysisID StopAfter,
+                        bool DisableVerify, AnalysisID StartBefore,
+                        AnalysisID StartAfter, AnalysisID StopAfter,
                         MachineFunctionInitializer *MFInitializer = nullptr) {
 
   // Add internal analysis passes from the target machine.
@@ -100,7 +100,7 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
   TargetPassConfig *PassConfig = TM->createPassConfig(PM);
-  PassConfig->setStartStopPasses(StartAfter, StopAfter);
+  PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter);
 
   // Set PassConfig options provided by TargetMachine.
   PassConfig->setDisableVerify(DisableVerify);
@@ -143,11 +143,12 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
 
 bool LLVMTargetMachine::addPassesToEmitFile(
     PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
-    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter,
-    MachineFunctionInitializer *MFInitializer) {
+    bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
+    AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
   // Add common CodeGen passes.
-  MCContext *Context = addPassesToGenerateCode(
-      this, PM, DisableVerify, StartAfter, StopAfter, MFInitializer);
+  MCContext *Context =
+      addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
+                              StopAfter, MFInitializer);
   if (!Context)
     return true;
 
@@ -231,7 +232,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
                                           raw_pwrite_stream &Out,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
-  Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr);
+  Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
+                                nullptr);
   if (!Ctx)
     return true;
 
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 154ce6fc122b..000151acd735 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -15,12 +15,12 @@
 #include "RegisterCoalescer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -49,7 +49,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getSubtarget().getRegisterInfo();
-  MRI = &MF.getRegInfo();
   LIS = &getAnalysis<LiveIntervals>();
   VRM = &getAnalysis<VirtRegMap>();
 
@@ -101,7 +100,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
                << " to " << PrintReg(PhysReg, TRI) << ':');
   assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
   VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
-  MRI->setPhysRegUsed(PhysReg);
 
   foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
                                          const LiveRange &Range) {
@@ -131,6 +129,14 @@ void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
   DEBUG(dbgs() << '\n');
 }
 
+bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const {
+  for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
+    if (!Matrix[*Unit].empty())
+      return true;
+  }
+  return false;
+}
+
 bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg,
                                              unsigned PhysReg) {
   // Check if the cached information is valid.
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index e9b3916a11fa..482c33ae2235 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MILexer.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include <cctype>
 
@@ -64,6 +65,17 @@ static bool isIdentifierChar(char C) {
   return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
 }
 
+static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
+  return StringSwitch<MIToken::TokenKind>(Identifier)
+      .Case("_", MIToken::underscore)
+      .Case("implicit", MIToken::kw_implicit)
+      .Case("implicit-def", MIToken::kw_implicit_define)
+      .Case("dead", MIToken::kw_dead)
+      .Case("killed", MIToken::kw_killed)
+      .Case("undef", MIToken::kw_undef)
+      .Default(MIToken::Identifier);
+}
+
 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
   if (!isalpha(C.peek()) && C.peek() != '_')
     return None;
@@ -71,8 +83,7 @@ static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
   while (isIdentifierChar(C.peek()))
     C.advance();
   auto Identifier = Range.upto(C);
-  Token = MIToken(Identifier == "_" ? MIToken::underscore : MIToken::Identifier,
-                  Identifier);
+  Token = MIToken(getIdentifierKind(Identifier), Identifier);
   return C;
 }
 
@@ -104,9 +115,22 @@ static Cursor maybeLexMachineBasicBlock(
   return C;
 }
 
+static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
+  auto Range = C;
+  C.advance(); // Skip '%'
+  auto NumberRange = C;
+  while (isdigit(C.peek()))
+    C.advance();
+  Token = MIToken(MIToken::VirtualRegister, Range.upto(C),
+                  APSInt(NumberRange.upto(C)));
+  return C;
+}
+
 static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
   if (C.peek() != '%')
     return None;
+  if (isdigit(C.peek(1)))
+    return lexVirtualRegister(C, Token);
   auto Range = C;
   C.advance(); // Skip '%'
   while (isIdentifierChar(C.peek()))
@@ -155,6 +179,8 @@ static MIToken::TokenKind symbolToken(char C) {
     return MIToken::comma;
   case '=':
     return MIToken::equal;
+  case ':':
+    return MIToken::colon;
   default:
     return MIToken::Error;
   }
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index c28935f38909..55460b56e7d6 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -35,6 +35,14 @@ struct MIToken {
     comma,
     equal,
     underscore,
+    colon,
+
+    // Keywords
+    kw_implicit,
+    kw_implicit_define,
+    kw_dead,
+    kw_killed,
+    kw_undef,
 
     // Identifier tokens
     Identifier,
@@ -44,7 +52,8 @@ struct MIToken {
     GlobalValue,
 
     // Other tokens
-    IntegerLiteral
+    IntegerLiteral,
+    VirtualRegister
   };
 
 private:
@@ -66,7 +75,13 @@ public:
   bool isError() const { return Kind == Error; }
 
   bool isRegister() const {
-    return Kind == NamedRegister || Kind == underscore;
+    return Kind == NamedRegister || Kind == underscore ||
+           Kind == VirtualRegister;
+  }
+
+  bool isRegisterFlag() const {
+    return Kind == kw_implicit || Kind == kw_implicit_define ||
+           Kind == kw_dead || Kind == kw_killed || Kind == kw_undef;
   }
 
   bool is(TokenKind K) const { return Kind == K; }
@@ -81,7 +96,7 @@ public:
 
   bool hasIntegerValue() const {
     return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
-           Kind == GlobalValue;
+           Kind == GlobalValue || Kind == VirtualRegister;
   }
 };
 
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index b618e53b8e43..c00011288a60 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
@@ -28,14 +29,25 @@ using namespace llvm;
 
 namespace {
 
+/// A wrapper struct around the 'MachineOperand' struct that includes a source
+/// range.
+struct MachineOperandWithLocation {
+  MachineOperand Operand;
+  StringRef::iterator Begin;
+  StringRef::iterator End;
+
+  MachineOperandWithLocation(const MachineOperand &Operand,
+                             StringRef::iterator Begin, StringRef::iterator End)
+      : Operand(Operand), Begin(Begin), End(End) {}
+};
+
 class MIParser {
   SourceMgr &SM;
   MachineFunction &MF;
   SMDiagnostic &Error;
   StringRef Source, CurrentSource;
   MIToken Token;
-  /// Maps from basic block numbers to MBBs.
-  const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots;
+  const PerFunctionMIParsingState &PFS;
   /// Maps from indices to unnamed global values and metadata nodes.
   const SlotMapping &IRSlots;
   /// Maps from instruction names to op codes.
@@ -44,11 +56,12 @@ class MIParser {
   StringMap<unsigned> Names2Regs;
   /// Maps from register mask names to register masks.
   StringMap<const uint32_t *> Names2RegMasks;
+  /// Maps from subregister names to subregister indices.
+  StringMap<unsigned> Names2SubRegIndices;
 
 public:
   MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
-           StringRef Source,
-           const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+           StringRef Source, const PerFunctionMIParsingState &PFS,
            const SlotMapping &IRSlots);
 
   void lex();
@@ -65,8 +78,11 @@ public:
 
   bool parse(MachineInstr *&MI);
   bool parseMBB(MachineBasicBlock *&MBB);
+  bool parseNamedRegister(unsigned &Reg);
 
   bool parseRegister(unsigned &Reg);
+  bool parseRegisterFlag(unsigned &Flags);
+  bool parseSubRegisterIndex(unsigned &SubReg);
   bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
   bool parseImmediateOperand(MachineOperand &Dest);
   bool parseMBBReference(MachineBasicBlock *&MBB);
@@ -88,6 +104,9 @@ private:
 
   bool parseInstruction(unsigned &OpCode);
 
+  bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands,
+                              const MCInstrDesc &MCID);
+
   void initNames2Regs();
 
   /// Try to convert a register name to a register number. Return true if the
@@ -100,17 +119,22 @@ private:
   ///
   /// Return null if the identifier isn't a register mask.
   const uint32_t *getRegMask(StringRef Identifier);
+
+  void initNames2SubRegIndices();
+
+  /// Check if the given identifier is a name of a subregister index.
+  ///
+  /// Return 0 if the name isn't a subregister index class.
+  unsigned getSubRegIndex(StringRef Name);
 };
 
 } // end anonymous namespace
 
 MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
-                   StringRef Source,
-                   const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+                   StringRef Source, const PerFunctionMIParsingState &PFS,
                    const SlotMapping &IRSlots)
     : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
-      Token(MIToken::Error, StringRef()), MBBSlots(MBBSlots), IRSlots(IRSlots) {
-}
+      Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {}
 
 void MIParser::lex() {
   CurrentSource = lexMIToken(
@@ -121,8 +145,6 @@ void MIParser::lex() {
 bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
 
 bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
-  // TODO: Get the proper location in the MIR file, not just a location inside
-  // the string.
   assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
   Error = SMDiagnostic(
       SM, SMLoc(),
@@ -137,11 +159,12 @@ bool MIParser::parse(MachineInstr *&MI) {
   // Parse any register operands before '='
   // TODO: Allow parsing of multiple operands before '='
   MachineOperand MO = MachineOperand::CreateImm(0);
-  SmallVector<MachineOperand, 8> Operands;
-  if (Token.isRegister()) {
+  SmallVector<MachineOperandWithLocation, 8> Operands;
+  if (Token.isRegister() || Token.isRegisterFlag()) {
+    auto Loc = Token.location();
     if (parseRegisterOperand(MO, /*IsDef=*/true))
       return true;
-    Operands.push_back(MO);
+    Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
     if (Token.isNot(MIToken::equal))
       return error("expected '='");
     lex();
@@ -155,9 +178,10 @@ bool MIParser::parse(MachineInstr *&MI) {
 
   // Parse the remaining machine operands.
   while (Token.isNot(MIToken::Eof)) {
+    auto Loc = Token.location();
     if (parseMachineOperand(MO))
       return true;
-    Operands.push_back(MO);
+    Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
     if (Token.is(MIToken::Eof))
       break;
     if (Token.isNot(MIToken::comma))
@@ -166,25 +190,16 @@ bool MIParser::parse(MachineInstr *&MI) {
   }
 
   const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
-
-  // Verify machine operands.
   if (!MCID.isVariadic()) {
-    for (size_t I = 0, E = Operands.size(); I < E; ++I) {
-      if (I < MCID.getNumOperands())
-        continue;
-      // Mark this register as implicit to prevent an assertion when it's added
-      // to an instruction. This is a temporary workaround until the implicit
-      // register flag can be parsed.
-      if (Operands[I].isReg())
-        Operands[I].setImplicit();
-    }
+    // FIXME: Move the implicit operand verification to the machine verifier.
+    if (verifyImplicitOperands(Operands, MCID))
+      return true;
   }
 
-  // TODO: Determine the implicit behaviour when implicit register flags are
-  // parsed.
+  // TODO: Check for extraneous machine operands.
   MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);
   for (const auto &Operand : Operands)
-    MI->addOperand(MF, Operand);
+    MI->addOperand(MF, Operand.Operand);
   return false;
 }
 
@@ -201,6 +216,80 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
   return false;
 }
 
+bool MIParser::parseNamedRegister(unsigned &Reg) {
+  lex();
+  if (Token.isNot(MIToken::NamedRegister))
+    return error("expected a named register");
+  if (parseRegister(Reg))
+    return 0;
+  lex();
+  if (Token.isNot(MIToken::Eof))
+    return error("expected end of string after the register reference");
+  return false;
+}
+
+static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
+  assert(MO.isImplicit());
+  return MO.isDef() ? "implicit-def" : "implicit";
+}
+
+static std::string getRegisterName(const TargetRegisterInfo *TRI,
+                                   unsigned Reg) {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg");
+  return StringRef(TRI->getName(Reg)).lower();
+}
+
+bool MIParser::verifyImplicitOperands(
+    ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) {
+  if (MCID.isCall())
+    // We can't verify call instructions as they can contain arbitrary implicit
+    // register and register mask operands.
+    return false;
+
+  // Gather all the expected implicit operands.
+  SmallVector<MachineOperand, 4> ImplicitOperands;
+  if (MCID.ImplicitDefs)
+    for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
+      ImplicitOperands.push_back(
+          MachineOperand::CreateReg(*ImpDefs, true, true));
+  if (MCID.ImplicitUses)
+    for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
+      ImplicitOperands.push_back(
+          MachineOperand::CreateReg(*ImpUses, false, true));
+
+  const auto *TRI = MF.getSubtarget().getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  size_t I = ImplicitOperands.size(), J = Operands.size();
+  while (I) {
+    --I;
+    if (J) {
+      --J;
+      const auto &ImplicitOperand = ImplicitOperands[I];
+      const auto &Operand = Operands[J].Operand;
+      if (ImplicitOperand.isIdenticalTo(Operand))
+        continue;
+      if (Operand.isReg() && Operand.isImplicit()) {
+        return error(Operands[J].Begin,
+                     Twine("expected an implicit register operand '") +
+                         printImplicitRegisterFlag(ImplicitOperand) + " %" +
+                         getRegisterName(TRI, ImplicitOperand.getReg()) + "'");
+      }
+    }
+    // TODO: Fix source location when Operands[J].end is right before '=', i.e:
+    // insead of reporting an error at this location:
+    //            %eax = MOV32r0
+    //                 ^
+    // report the error at the following location:
+    //            %eax = MOV32r0
+    //                          ^
+    return error(J < Operands.size() ? Operands[J].End : Token.location(),
+                 Twine("missing implicit register operand '") +
+                     printImplicitRegisterFlag(ImplicitOperands[I]) + " %" +
+                     getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'");
+  }
+  return false;
+}
+
 bool MIParser::parseInstruction(unsigned &OpCode) {
   if (Token.isNot(MIToken::Identifier))
     return error("expected a machine instruction");
@@ -222,6 +311,17 @@ bool MIParser::parseRegister(unsigned &Reg) {
       return error(Twine("unknown register name '") + Name + "'");
     break;
   }
+  case MIToken::VirtualRegister: {
+    unsigned ID;
+    if (getUnsigned(ID))
+      return true;
+    const auto RegInfo = PFS.VirtualRegisterSlots.find(ID);
+    if (RegInfo == PFS.VirtualRegisterSlots.end())
+      return error(Twine("use of undefined virtual register '%") + Twine(ID) +
+                   "'");
+    Reg = RegInfo->second;
+    break;
+  }
   // TODO: Parse other register kinds.
   default:
     llvm_unreachable("The current token should be a register");
@@ -229,14 +329,66 @@ bool MIParser::parseRegister(unsigned &Reg) {
   return false;
 }
 
+bool MIParser::parseRegisterFlag(unsigned &Flags) {
+  switch (Token.kind()) {
+  case MIToken::kw_implicit:
+    Flags |= RegState::Implicit;
+    break;
+  case MIToken::kw_implicit_define:
+    Flags |= RegState::ImplicitDefine;
+    break;
+  case MIToken::kw_dead:
+    Flags |= RegState::Dead;
+    break;
+  case MIToken::kw_killed:
+    Flags |= RegState::Kill;
+    break;
+  case MIToken::kw_undef:
+    Flags |= RegState::Undef;
+    break;
+  // TODO: report an error when we specify the same flag more than once.
+  // TODO: parse the other register flags.
+  default:
+    llvm_unreachable("The current token should be a register flag");
+  }
+  lex();
+  return false;
+}
+
+bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
+  assert(Token.is(MIToken::colon));
+  lex();
+  if (Token.isNot(MIToken::Identifier))
+    return error("expected a subregister index after ':'");
+  auto Name = Token.stringValue();
+  SubReg = getSubRegIndex(Name);
+  if (!SubReg)
+    return error(Twine("use of unknown subregister index '") + Name + "'");
+  lex();
+  return false;
+}
+
 bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
   unsigned Reg;
-  // TODO: Parse register flags.
+  unsigned Flags = IsDef ? RegState::Define : 0;
+  while (Token.isRegisterFlag()) {
+    if (parseRegisterFlag(Flags))
+      return true;
+  }
+  if (!Token.isRegister())
+    return error("expected a register after register flags");
   if (parseRegister(Reg))
     return true;
   lex();
-  // TODO: Parse subregister.
-  Dest = MachineOperand::CreateReg(Reg, IsDef);
+  unsigned SubReg = 0;
+  if (Token.is(MIToken::colon)) {
+    if (parseSubRegisterIndex(SubReg))
+      return true;
+  }
+  Dest = MachineOperand::CreateReg(
+      Reg, Flags & RegState::Define, Flags & RegState::Implicit,
+      Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
+      /*isEarlyClobber=*/false, SubReg);
   return false;
 }
 
@@ -266,8 +418,8 @@ bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
   unsigned Number;
   if (getUnsigned(Number))
     return true;
-  auto MBBInfo = MBBSlots.find(Number);
-  if (MBBInfo == MBBSlots.end())
+  auto MBBInfo = PFS.MBBSlots.find(Number);
+  if (MBBInfo == PFS.MBBSlots.end())
     return error(Twine("use of undefined machine basic block #") +
                  Twine(Number));
   MBB = MBBInfo->second;
@@ -318,8 +470,14 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
 
 bool MIParser::parseMachineOperand(MachineOperand &Dest) {
   switch (Token.kind()) {
+  case MIToken::kw_implicit:
+  case MIToken::kw_implicit_define:
+  case MIToken::kw_dead:
+  case MIToken::kw_killed:
+  case MIToken::kw_undef:
   case MIToken::underscore:
   case MIToken::NamedRegister:
+  case MIToken::VirtualRegister:
     return parseRegisterOperand(Dest);
   case MIToken::IntegerLiteral:
     return parseImmediateOperand(Dest);
@@ -408,16 +566,41 @@ const uint32_t *MIParser::getRegMask(StringRef Identifier) {
   return RegMaskInfo->getValue();
 }
 
-bool llvm::parseMachineInstr(
-    MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF, StringRef Src,
-    const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
-    const SlotMapping &IRSlots, SMDiagnostic &Error) {
-  return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parse(MI);
+void MIParser::initNames2SubRegIndices() {
+  if (!Names2SubRegIndices.empty())
+    return;
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
+    Names2SubRegIndices.insert(
+        std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I));
+}
+
+unsigned MIParser::getSubRegIndex(StringRef Name) {
+  initNames2SubRegIndices();
+  auto SubRegInfo = Names2SubRegIndices.find(Name);
+  if (SubRegInfo == Names2SubRegIndices.end())
+    return 0;
+  return SubRegInfo->getValue();
+}
+
+bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM,
+                             MachineFunction &MF, StringRef Src,
+                             const PerFunctionMIParsingState &PFS,
+                             const SlotMapping &IRSlots, SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI);
+}
+
+bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
+                             MachineFunction &MF, StringRef Src,
+                             const PerFunctionMIParsingState &PFS,
+                             const SlotMapping &IRSlots, SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB);
 }
 
-bool llvm::parseMBBReference(
-    MachineBasicBlock *&MBB, SourceMgr &SM, MachineFunction &MF, StringRef Src,
-    const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
-    const SlotMapping &IRSlots, SMDiagnostic &Error) {
-  return MIParser(SM, MF, Error, Src, MBBSlots, IRSlots).parseMBB(MBB);
+bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
+                                       MachineFunction &MF, StringRef Src,
+                                       const PerFunctionMIParsingState &PFS,
+                                       const SlotMapping &IRSlots,
+                                       SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg);
 }
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
index 4d6d4e700217..fca4c4e6f885 100644
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ b/lib/CodeGen/MIRParser/MIParser.h
@@ -26,16 +26,26 @@ struct SlotMapping;
 class SMDiagnostic;
 class SourceMgr;
 
+struct PerFunctionMIParsingState {
+  DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
+  DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+};
+
 bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF,
-                       StringRef Src,
-                       const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+                       StringRef Src, const PerFunctionMIParsingState &PFS,
                        const SlotMapping &IRSlots, SMDiagnostic &Error);
 
 bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
                        MachineFunction &MF, StringRef Src,
-                       const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots,
+                       const PerFunctionMIParsingState &PFS,
                        const SlotMapping &IRSlots, SMDiagnostic &Error);
 
+bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
+                                 MachineFunction &MF, StringRef Src,
+                                 const PerFunctionMIParsingState &PFS,
+                                 const SlotMapping &IRSlots,
+                                 SMDiagnostic &Error);
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 397458300782..16b0e1655891 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -21,6 +21,7 @@
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/IR/BasicBlock.h"
@@ -48,6 +49,8 @@ class MIRParserImpl {
   LLVMContext &Context;
   StringMap<std::unique_ptr<yaml::MachineFunction>> Functions;
   SlotMapping IRSlots;
+  /// Maps from register class names to register classes.
+  StringMap<const TargetRegisterClass *> Names2RegClasses;
 
 public:
   MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
@@ -60,6 +63,11 @@ public:
   /// Always returns true.
   bool error(const Twine &Message);
 
+  /// Report an error with the given message at the given location.
+  ///
+  /// Always returns true.
+  bool error(SMLoc Loc, const Twine &Message);
+
   /// Report a given error with the location translated from the location in an
   /// embedded string literal to a location in the MIR file.
   ///
@@ -90,13 +98,18 @@ public:
   /// Initialize the machine basic block using it's YAML representation.
   ///
   /// Return true if an error occurred.
-  bool initializeMachineBasicBlock(
-      MachineFunction &MF, MachineBasicBlock &MBB,
-      const yaml::MachineBasicBlock &YamlMBB,
-      const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+  bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+                                   const yaml::MachineBasicBlock &YamlMBB,
+                                   const PerFunctionMIParsingState &PFS);
+
+  bool
+  initializeRegisterInfo(const MachineFunction &MF,
+                         MachineRegisterInfo &RegInfo,
+                         const yaml::MachineFunction &YamlMF,
+                         DenseMap<unsigned, unsigned> &VirtualRegisterSlots);
 
-  bool initializeRegisterInfo(MachineRegisterInfo &RegInfo,
-                              const yaml::MachineFunction &YamlMF);
+  bool initializeFrameInfo(MachineFrameInfo &MFI,
+                           const yaml::MachineFunction &YamlMF);
 
 private:
   /// Return a MIR diagnostic converted from an MI string diagnostic.
@@ -109,6 +122,14 @@ private:
 
   /// Create an empty function with the given name.
   void createDummyFunction(StringRef Name, Module &M);
+
+  void initNames2RegClasses(const MachineFunction &MF);
+
+  /// Check if the given identifier is a name of a register class.
+  ///
+  /// Return null if the name isn't a register class.
+  const TargetRegisterClass *getRegClass(const MachineFunction &MF,
+                                         StringRef Name);
 };
 
 } // end namespace llvm
@@ -125,6 +146,12 @@ bool MIRParserImpl::error(const Twine &Message) {
   return true;
 }
 
+bool MIRParserImpl::error(SMLoc Loc, const Twine &Message) {
+  Context.diagnose(DiagnosticInfoMIRParser(
+      DS_Error, SM.GetMessage(Loc, SourceMgr::DK_Error, Message)));
+  return true;
+}
+
 bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) {
   assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error");
   reportDiagnostic(diagFromMIStringDiag(Error, SourceRange));
@@ -233,34 +260,44 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
     MF.setAlignment(YamlMF.Alignment);
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
   MF.setHasInlineAsm(YamlMF.HasInlineAsm);
-  if (initializeRegisterInfo(MF.getRegInfo(), YamlMF))
+  PerFunctionMIParsingState PFS;
+  if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF,
+                             PFS.VirtualRegisterSlots))
+    return true;
+  if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF))
     return true;
 
   const auto &F = *MF.getFunction();
-  DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
   for (const auto &YamlMBB : YamlMF.BasicBlocks) {
     const BasicBlock *BB = nullptr;
-    if (!YamlMBB.Name.empty()) {
+    const yaml::StringValue &Name = YamlMBB.Name;
+    if (!Name.Value.empty()) {
       BB = dyn_cast_or_null<BasicBlock>(
-          F.getValueSymbolTable().lookup(YamlMBB.Name));
+          F.getValueSymbolTable().lookup(Name.Value));
       if (!BB)
-        return error(Twine("basic block '") + YamlMBB.Name +
-                     "' is not defined in the function '" + MF.getName() + "'");
+        return error(Name.SourceRange.Start,
+                     Twine("basic block '") + Name.Value +
+                         "' is not defined in the function '" + MF.getName() +
+                         "'");
     }
     auto *MBB = MF.CreateMachineBasicBlock(BB);
     MF.insert(MF.end(), MBB);
-    bool WasInserted = MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
+    bool WasInserted =
+        PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
     if (!WasInserted)
       return error(Twine("redefinition of machine basic block with id #") +
                    Twine(YamlMBB.ID));
   }
 
+  if (YamlMF.BasicBlocks.empty())
+    return error(Twine("machine function '") + Twine(MF.getName()) +
+                 "' requires at least one machine basic block in its body");
   // Initialize the machine basic blocks after creating them all so that the
   // machine instructions parser can resolve the MBB references.
   unsigned I = 0;
   for (const auto &YamlMBB : YamlMF.BasicBlocks) {
     if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB,
-                                    MBBSlots))
+                                    PFS))
       return true;
   }
   return false;
@@ -269,7 +306,7 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
 bool MIRParserImpl::initializeMachineBasicBlock(
     MachineFunction &MF, MachineBasicBlock &MBB,
     const yaml::MachineBasicBlock &YamlMBB,
-    const DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+    const PerFunctionMIParsingState &PFS) {
   MBB.setAlignment(YamlMBB.Alignment);
   if (YamlMBB.AddressTaken)
     MBB.setHasAddressTaken();
@@ -278,16 +315,24 @@ bool MIRParserImpl::initializeMachineBasicBlock(
   // Parse the successors.
   for (const auto &MBBSource : YamlMBB.Successors) {
     MachineBasicBlock *SuccMBB = nullptr;
-    if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, MBBSlots, IRSlots,
+    if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots,
                           Error))
       return error(Error, MBBSource.SourceRange);
     // TODO: Report an error when adding the same successor more than once.
     MBB.addSuccessor(SuccMBB);
   }
+  // Parse the liveins.
+  for (const auto &LiveInSource : YamlMBB.LiveIns) {
+    unsigned Reg = 0;
+    if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS,
+                                    IRSlots, Error))
+      return error(Error, LiveInSource.SourceRange);
+    MBB.addLiveIn(Reg);
+  }
   // Parse the instructions.
   for (const auto &MISource : YamlMBB.Instructions) {
     MachineInstr *MI = nullptr;
-    if (parseMachineInstr(MI, SM, MF, MISource.Value, MBBSlots, IRSlots, Error))
+    if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error))
       return error(Error, MISource.SourceRange);
     MBB.insert(MBB.end(), MI);
   }
@@ -295,7 +340,9 @@ bool MIRParserImpl::initializeMachineBasicBlock(
 }
 
 bool MIRParserImpl::initializeRegisterInfo(
-    MachineRegisterInfo &RegInfo, const yaml::MachineFunction &YamlMF) {
+    const MachineFunction &MF, MachineRegisterInfo &RegInfo,
+    const yaml::MachineFunction &YamlMF,
+    DenseMap<unsigned, unsigned> &VirtualRegisterSlots) {
   assert(RegInfo.isSSA());
   if (!YamlMF.IsSSA)
     RegInfo.leaveSSA();
@@ -303,6 +350,67 @@ bool MIRParserImpl::initializeRegisterInfo(
   if (!YamlMF.TracksRegLiveness)
     RegInfo.invalidateLiveness();
   RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
+
+  // Parse the virtual register information.
+  for (const auto &VReg : YamlMF.VirtualRegisters) {
+    const auto *RC = getRegClass(MF, VReg.Class.Value);
+    if (!RC)
+      return error(VReg.Class.SourceRange.Start,
+                   Twine("use of undefined register class '") +
+                       VReg.Class.Value + "'");
+    unsigned Reg = RegInfo.createVirtualRegister(RC);
+    // TODO: Report an error when the same virtual register with the same ID is
+    // redefined.
+    VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg));
+  }
+  return false;
+}
+
+bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
+                                        const yaml::MachineFunction &YamlMF) {
+  const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
+  MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
+  MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
+  MFI.setHasStackMap(YamlMFI.HasStackMap);
+  MFI.setHasPatchPoint(YamlMFI.HasPatchPoint);
+  MFI.setStackSize(YamlMFI.StackSize);
+  MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment);
+  if (YamlMFI.MaxAlignment)
+    MFI.ensureMaxAlignment(YamlMFI.MaxAlignment);
+  MFI.setAdjustsStack(YamlMFI.AdjustsStack);
+  MFI.setHasCalls(YamlMFI.HasCalls);
+  MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize);
+  MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
+  MFI.setHasVAStart(YamlMFI.HasVAStart);
+  MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+
+  // Initialize the fixed frame objects.
+  for (const auto &Object : YamlMF.FixedStackObjects) {
+    int ObjectIdx;
+    if (Object.Type != yaml::FixedMachineStackObject::SpillSlot)
+      ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset,
+                                        Object.IsImmutable, Object.IsAliased);
+    else
+      ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
+    MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+    // TODO: Store the mapping between fixed object IDs and object indices to
+    // parse fixed stack object references correctly.
+  }
+
+  // Initialize the ordinary frame objects.
+  for (const auto &Object : YamlMF.StackObjects) {
+    int ObjectIdx;
+    if (Object.Type == yaml::MachineStackObject::VariableSized)
+      ObjectIdx =
+          MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr);
+    else
+      ObjectIdx = MFI.CreateStackObject(
+          Object.Size, Object.Alignment,
+          Object.Type == yaml::MachineStackObject::SpillSlot);
+    MFI.setObjectOffset(ObjectIdx, Object.Offset);
+    // TODO: Store the mapping between object IDs and object indices to parse
+    // stack object references correctly.
+  }
   return false;
 }
 
@@ -353,6 +461,26 @@ SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
                       Error.getFixIts());
 }
 
+void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) {
+  if (!Names2RegClasses.empty())
+    return;
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
+    const auto *RC = TRI->getRegClass(I);
+    Names2RegClasses.insert(
+        std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
+  }
+}
+
+const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
+                                                      StringRef Name) {
+  initNames2RegClasses(MF);
+  auto RegClassInfo = Names2RegClasses.find(Name);
+  if (RegClassInfo == Names2RegClasses.end())
+    return nullptr;
+  return RegClassInfo->getValue();
+}
+
 MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
     : Impl(std::move(Impl)) {}
 
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 76cbe2994c95..d5cf9244199e 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -15,10 +15,12 @@
 #include "MIRPrinter.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -40,9 +42,13 @@ public:
 
   void print(const MachineFunction &MF);
 
-  void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo);
-  void convert(const Module &M, yaml::MachineBasicBlock &YamlMBB,
+  void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo,
+               const TargetRegisterInfo *TRI);
+  void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI);
+  void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB,
                const MachineBasicBlock &MBB);
+  void convertStackObjects(yaml::MachineFunction &MF,
+                           const MachineFrameInfo &MFI);
 
 private:
   void initRegisterMaskIds(const MachineFunction &MF);
@@ -51,14 +57,14 @@ private:
 /// This class prints out the machine instructions using the MIR serialization
 /// format.
 class MIPrinter {
-  const Module &M;
   raw_ostream &OS;
+  ModuleSlotTracker &MST;
   const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
 
 public:
-  MIPrinter(const Module &M, raw_ostream &OS,
+  MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
             const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds)
-      : M(M), OS(OS), RegisterMaskIds(RegisterMaskIds) {}
+      : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {}
 
   void print(const MachineInstr &MI);
   void printMBBReference(const MachineBasicBlock &MBB);
@@ -84,6 +90,19 @@ template <> struct BlockScalarTraits<Module> {
 } // end namespace yaml
 } // end namespace llvm
 
+static void printReg(unsigned Reg, raw_ostream &OS,
+                     const TargetRegisterInfo *TRI) {
+  // TODO: Print Stack Slots.
+  if (!Reg)
+    OS << '_';
+  else if (TargetRegisterInfo::isVirtualRegister(Reg))
+    OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+  else if (Reg < TRI->getNumRegs())
+    OS << '%' << StringRef(TRI->getName(Reg)).lower();
+  else
+    llvm_unreachable("Can't print this kind of register yet");
+}
+
 void MIRPrinter::print(const MachineFunction &MF) {
   initRegisterMaskIds(MF);
 
@@ -92,10 +111,12 @@ void MIRPrinter::print(const MachineFunction &MF) {
   YamlMF.Alignment = MF.getAlignment();
   YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
   YamlMF.HasInlineAsm = MF.hasInlineAsm();
-  convert(YamlMF, MF.getRegInfo());
+  convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
+  convert(YamlMF.FrameInfo, *MF.getFrameInfo());
+  convertStackObjects(YamlMF, *MF.getFrameInfo());
 
   int I = 0;
-  const auto &M = *MF.getFunction()->getParent();
+  ModuleSlotTracker MST(MF.getFunction()->getParent());
   for (const auto &MBB : MF) {
     // TODO: Allow printing of non sequentially numbered MBBs.
     // This is currently needed as the basic block references get their index
@@ -105,7 +126,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
            "Can't print MBBs that aren't sequentially numbered");
     (void)I;
     yaml::MachineBasicBlock YamlMBB;
-    convert(M, YamlMBB, MBB);
+    convert(MST, YamlMBB, MBB);
     YamlMF.BasicBlocks.push_back(YamlMBB);
   }
   yaml::Output Out(OS);
@@ -113,37 +134,120 @@ void MIRPrinter::print(const MachineFunction &MF) {
 }
 
 void MIRPrinter::convert(yaml::MachineFunction &MF,
-                         const MachineRegisterInfo &RegInfo) {
+                         const MachineRegisterInfo &RegInfo,
+                         const TargetRegisterInfo *TRI) {
   MF.IsSSA = RegInfo.isSSA();
   MF.TracksRegLiveness = RegInfo.tracksLiveness();
   MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled();
+
+  // Print the virtual register definitions.
+  for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+    yaml::VirtualRegisterDefinition VReg;
+    VReg.ID = I;
+    VReg.Class =
+        StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+    MF.VirtualRegisters.push_back(VReg);
+  }
+}
+
+void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
+                         const MachineFrameInfo &MFI) {
+  YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken();
+  YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken();
+  YamlMFI.HasStackMap = MFI.hasStackMap();
+  YamlMFI.HasPatchPoint = MFI.hasPatchPoint();
+  YamlMFI.StackSize = MFI.getStackSize();
+  YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment();
+  YamlMFI.MaxAlignment = MFI.getMaxAlignment();
+  YamlMFI.AdjustsStack = MFI.adjustsStack();
+  YamlMFI.HasCalls = MFI.hasCalls();
+  YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize();
+  YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
+  YamlMFI.HasVAStart = MFI.hasVAStart();
+  YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
 }
 
-void MIRPrinter::convert(const Module &M, yaml::MachineBasicBlock &YamlMBB,
+void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
+                                     const MachineFrameInfo &MFI) {
+  // Process fixed stack objects.
+  unsigned ID = 0;
+  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+    if (MFI.isDeadObjectIndex(I))
+      continue;
+
+    yaml::FixedMachineStackObject YamlObject;
+    YamlObject.ID = ID++;
+    YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+                          ? yaml::FixedMachineStackObject::SpillSlot
+                          : yaml::FixedMachineStackObject::DefaultType;
+    YamlObject.Offset = MFI.getObjectOffset(I);
+    YamlObject.Size = MFI.getObjectSize(I);
+    YamlObject.Alignment = MFI.getObjectAlignment(I);
+    YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
+    YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
+    MF.FixedStackObjects.push_back(YamlObject);
+    // TODO: Store the mapping between fixed object IDs and object indices to
+    // print the fixed stack object references correctly.
+  }
+
+  // Process ordinary stack objects.
+  ID = 0;
+  for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) {
+    if (MFI.isDeadObjectIndex(I))
+      continue;
+
+    yaml::MachineStackObject YamlObject;
+    YamlObject.ID = ID++;
+    YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
+                          ? yaml::MachineStackObject::SpillSlot
+                          : MFI.isVariableSizedObjectIndex(I)
+                                ? yaml::MachineStackObject::VariableSized
+                                : yaml::MachineStackObject::DefaultType;
+    YamlObject.Offset = MFI.getObjectOffset(I);
+    YamlObject.Size = MFI.getObjectSize(I);
+    YamlObject.Alignment = MFI.getObjectAlignment(I);
+
+    MF.StackObjects.push_back(YamlObject);
+    // TODO: Store the mapping between object IDs and object indices to print
+    // the stack object references correctly.
+  }
+}
+
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+                         yaml::MachineBasicBlock &YamlMBB,
                          const MachineBasicBlock &MBB) {
   assert(MBB.getNumber() >= 0 && "Invalid MBB number");
   YamlMBB.ID = (unsigned)MBB.getNumber();
   // TODO: Serialize unnamed BB references.
   if (const auto *BB = MBB.getBasicBlock())
-    YamlMBB.Name = BB->hasName() ? BB->getName() : "<unnamed bb>";
+    YamlMBB.Name.Value = BB->hasName() ? BB->getName() : "<unnamed bb>";
   else
-    YamlMBB.Name = "";
+    YamlMBB.Name.Value = "";
   YamlMBB.Alignment = MBB.getAlignment();
   YamlMBB.AddressTaken = MBB.hasAddressTaken();
   YamlMBB.IsLandingPad = MBB.isLandingPad();
   for (const auto *SuccMBB : MBB.successors()) {
     std::string Str;
     raw_string_ostream StrOS(Str);
-    MIPrinter(M, StrOS, RegisterMaskIds).printMBBReference(*SuccMBB);
+    MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB);
     YamlMBB.Successors.push_back(StrOS.str());
   }
-
+  // Print the live in registers.
+  const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) {
+    std::string Str;
+    raw_string_ostream StrOS(Str);
+    printReg(*I, StrOS, TRI);
+    YamlMBB.LiveIns.push_back(StrOS.str());
+  }
   // Print the machine instructions.
   YamlMBB.Instructions.reserve(MBB.size());
   std::string Str;
   for (const auto &MI : MBB) {
     raw_string_ostream StrOS(Str);
-    MIPrinter(M, StrOS, RegisterMaskIds).print(MI);
+    MIPrinter(StrOS, MST, RegisterMaskIds).print(MI);
     YamlMBB.Instructions.push_back(StrOS.str());
     Str.clear();
   }
@@ -188,18 +292,6 @@ void MIPrinter::print(const MachineInstr &MI) {
   }
 }
 
-static void printReg(unsigned Reg, raw_ostream &OS,
-                     const TargetRegisterInfo *TRI) {
-  // TODO: Print Stack Slots.
-  // TODO: Print virtual registers.
-  if (!Reg)
-    OS << '_';
-  else if (Reg < TRI->getNumRegs())
-    OS << '%' << StringRef(TRI->getName(Reg)).lower();
-  else
-    llvm_unreachable("Can't print this kind of register yet");
-}
-
 void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
   OS << "%bb." << MBB.getNumber();
   if (const auto *BB = MBB.getBasicBlock()) {
@@ -211,9 +303,19 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
 void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
   switch (Op.getType()) {
   case MachineOperand::MO_Register:
-    // TODO: Print register flags.
+    // TODO: Print the other register flags.
+    if (Op.isImplicit())
+      OS << (Op.isDef() ? "implicit-def " : "implicit ");
+    if (Op.isDead())
+      OS << "dead ";
+    if (Op.isKill())
+      OS << "killed ";
+    if (Op.isUndef())
+      OS << "undef ";
     printReg(Op.getReg(), OS, TRI);
-    // TODO: Print sub register.
+    // Print the sub register.
+    if (Op.getSubReg() != 0)
+      OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
     break;
   case MachineOperand::MO_Immediate:
     OS << Op.getImm();
@@ -222,10 +324,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
     printMBBReference(*Op.getMBB());
     break;
   case MachineOperand::MO_GlobalAddress:
-    // FIXME: Make this faster - print as operand will create a slot tracker to
-    // print unnamed values for the whole module every time it's called, which
-    // is inefficient.
-    Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, &M);
+    Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
     // TODO: Print offset and target flags.
     break;
   case MachineOperand::MO_RegisterMask: {
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 467a2e4eb428..3f04bb0b532b 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -19,8 +19,8 @@
 using namespace llvm;
 
 namespace llvm {
-TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
-TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+template class DomTreeNodeBase<MachineBasicBlock>;
+template class DominatorTreeBase<MachineBasicBlock>;
 }
 
 char MachineDominatorTree::ID = 0;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 800d1b5bd57d..9856e70edaef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -29,6 +29,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -74,7 +75,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   if (Fn->hasFnAttribute(Attribute::StackAlignment))
     FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
 
-  ConstantPool = new (Allocator) MachineConstantPool(TM);
+  ConstantPool = new (Allocator) MachineConstantPool(getDataLayout());
   Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
 
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
@@ -118,6 +119,10 @@ MachineFunction::~MachineFunction() {
   }
 }
 
+const DataLayout &MachineFunction::getDataLayout() const {
+  return Fn->getParent()->getDataLayout();
+}
+
 /// Get the JumpTableInfo for this function.
 /// If it does not already exist, allocate one.
 MachineJumpTableInfo *MachineFunction::
@@ -458,12 +463,12 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
 /// normal 'L' label is returned.
 MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
                                         bool isLinkerPrivate) const {
-  const DataLayout *DL = getTarget().getDataLayout();
+  const DataLayout &DL = getDataLayout();
   assert(JumpTableInfo && "No jump tables");
   assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
 
-  const char *Prefix = isLinkerPrivate ? DL->getLinkerPrivateGlobalPrefix() :
-                                         DL->getPrivateGlobalPrefix();
+  const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
+                                       : DL.getPrivateGlobalPrefix();
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
@@ -472,9 +477,9 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
 
 /// Return a function-local symbol to represent the PIC base.
 MCSymbol *MachineFunction::getPICBaseSymbol() const {
-  const DataLayout *DL = getTarget().getDataLayout();
-  return Ctx.getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
-                               Twine(getFunctionNumber())+"$pb");
+  const DataLayout &DL = getDataLayout();
+  return Ctx.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+                               Twine(getFunctionNumber()) + "$pb");
 }
 
 //===----------------------------------------------------------------------===//
@@ -790,10 +795,6 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
 
 void MachineConstantPoolValue::anchor() { }
 
-const DataLayout *MachineConstantPool::getDataLayout() const {
-  return TM.getDataLayout();
-}
-
 Type *MachineConstantPoolEntry::getType() const {
   if (isMachineConstantPoolEntry())
     return Val.MachineCPVal->getType();
@@ -851,7 +852,7 @@ MachineConstantPool::~MachineConstantPool() {
 /// Test whether the given two constants can be allocated the same constant pool
 /// entry.
 static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
-                                      const DataLayout *TD) {
+                                      const DataLayout &DL) {
   // Handle the trivial case quickly.
   if (A == B) return true;
 
@@ -865,8 +866,8 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
     return false;
 
   // For now, only support constants with the same size.
-  uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
-  if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128)
+  uint64_t StoreSize = DL.getTypeStoreSize(A->getType());
+  if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128)
     return false;
 
   Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
@@ -877,16 +878,16 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   // DataLayout.
   if (isa<PointerType>(A->getType()))
     A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
-                                 const_cast<Constant *>(A), *TD);
+                                 const_cast<Constant *>(A), DL);
   else if (A->getType() != IntTy)
     A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
-                                 const_cast<Constant *>(A), *TD);
+                                 const_cast<Constant *>(A), DL);
   if (isa<PointerType>(B->getType()))
     B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
-                                 const_cast<Constant *>(B), *TD);
+                                 const_cast<Constant *>(B), DL);
   else if (B->getType() != IntTy)
     B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
-                                 const_cast<Constant *>(B), *TD);
+                                 const_cast<Constant *>(B), DL);
 
   return A == B;
 }
@@ -903,8 +904,7 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
   // FIXME, this could be made much more efficient for large constant pools.
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
     if (!Constants[i].isMachineConstantPoolEntry() &&
-        CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C,
-                                  getDataLayout())) {
+        CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) {
       if ((unsigned)Constants[i].getAlignment() < Alignment)
         Constants[i].Alignment = Alignment;
       return i;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 42d0603ab96b..6a206249d834 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -320,7 +320,10 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
                                        const Function *Personality) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.Personality = Personality;
+  addPersonality(Personality);
+}
 
+void MachineModuleInfo::addPersonality(const Function *Personality) {
   for (unsigned i = 0; i < Personalities.size(); ++i)
     if (Personalities[i] == Personality)
       return;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 278a8f24d63e..5984af87a184 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -28,7 +29,6 @@ MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
     TracksSubRegLiveness(false) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
-  UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
   UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
 
   // Create the physreg use/def lists.
@@ -441,3 +441,49 @@ void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const {
       UseMI->getOperand(0).setReg(0U);
   }
 }
+
+static const Function *getCalledFunction(const MachineInstr &MI) {
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isGlobal())
+      continue;
+    const Function *Func = dyn_cast<Function>(MO.getGlobal());
+    if (Func != nullptr)
+      return Func;
+  }
+  return nullptr;
+}
+
+static bool isNoReturnDef(const MachineOperand &MO) {
+  // Anything which is not a noreturn function is a real def.
+  const MachineInstr &MI = *MO.getParent();
+  if (!MI.isCall())
+    return false;
+  const MachineBasicBlock &MBB = *MI.getParent();
+  if (!MBB.succ_empty())
+    return false;
+  const MachineFunction &MF = *MBB.getParent();
+  // We need to keep correct unwind information even if the function will
+  // not return, since the runtime may need it.
+  if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
+    return false;
+  const Function *Called = getCalledFunction(MI);
+  if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn)
+      || !Called->hasFnAttribute(Attribute::NoUnwind))
+    return false;
+
+  return true;
+}
+
+bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
+  if (UsedPhysRegMask.test(PhysReg))
+    return true;
+  const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+  for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) {
+    for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) {
+      if (isNoReturnDef(MO))
+        continue;
+      return true;
+    }
+  }
+  return false;
+}
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index f9adba0b35c4..9404c687d410 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -509,18 +509,17 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
             << " height.\n");
       // Find any MBB predecessors that have MBB as their preferred successor.
       // They are the only ones that need to be invalidated.
-      for (MachineBasicBlock::const_pred_iterator
-           I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
-        TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+      for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+        TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()];
         if (!TBI.hasValidHeight())
           continue;
         if (TBI.Succ == MBB) {
           TBI.invalidateHeight();
-          WorkList.push_back(*I);
+          WorkList.push_back(Pred);
           continue;
         }
         // Verify that TBI.Succ is actually a *I successor.
-        assert((!TBI.Succ || (*I)->isSuccessor(TBI.Succ)) && "CFG changed");
+        assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed");
       }
     } while (!WorkList.empty());
   }
@@ -535,18 +534,17 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
             << " depth.\n");
       // Find any MBB successors that have MBB as their preferred predecessor.
       // They are the only ones that need to be invalidated.
-      for (MachineBasicBlock::const_succ_iterator
-           I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
-        TraceBlockInfo &TBI = BlockInfo[(*I)->getNumber()];
+      for (const MachineBasicBlock *Succ : MBB->successors()) {
+        TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()];
         if (!TBI.hasValidDepth())
           continue;
         if (TBI.Pred == MBB) {
           TBI.invalidateDepth();
-          WorkList.push_back(*I);
+          WorkList.push_back(Succ);
           continue;
         }
         // Verify that TBI.Pred is actually a *I predecessor.
-        assert((!TBI.Pred || (*I)->isPredecessor(TBI.Pred)) && "CFG changed");
+        assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed");
       }
     } while (!WorkList.empty());
   }
@@ -998,8 +996,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
   // MBB is the highest precomputed block in the trace.
   if (MBB) {
     TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
-    for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
-      LiveInReg LI = TBI.LiveIns[i];
+    for (LiveInReg &LI : TBI.LiveIns) {
       if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
         // For virtual registers, the def latency is included.
         unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
@@ -1131,11 +1128,16 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
 
 MachineTraceMetrics::Trace
 MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) {
-  // FIXME: Check cache tags, recompute as needed.
-  computeTrace(MBB);
-  computeInstrDepths(MBB);
-  computeInstrHeights(MBB);
-  return Trace(*this, BlockInfo[MBB->getNumber()]);
+  TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
+
+  if (!TBI.hasValidDepth() || !TBI.hasValidHeight())
+    computeTrace(MBB);
+  if (!TBI.HasValidInstrDepths)
+    computeInstrDepths(MBB);
+  if (!TBI.HasValidInstrHeights)
+    computeInstrHeights(MBB);
+  
+  return Trace(*this, TBI);
 }
 
 unsigned
@@ -1204,8 +1206,7 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
                             unsigned ResourceIdx)
                          ->unsigned {
     unsigned Cycles = 0;
-    for (unsigned I = 0; I != Instrs.size(); ++I) {
-      const MCSchedClassDesc *SC = Instrs[I];
+    for (const MCSchedClassDesc *SC : Instrs) {
       if (!SC->isValid())
         continue;
       for (TargetSchedModel::ProcResIter
@@ -1223,8 +1224,8 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
 
   for (unsigned K = 0; K != PRDepths.size(); ++K) {
     unsigned PRCycles = PRDepths[K] + PRHeights[K];
-    for (unsigned I = 0; I != Extrablocks.size(); ++I)
-      PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K];
+    for (const MachineBasicBlock *MBB : Extrablocks)
+      PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K];
     PRCycles += extraCycles(ExtraInstrs, K);
     PRCycles -= extraCycles(RemoveInstrs, K);
     PRMax = std::max(PRMax, PRCycles);
@@ -1235,8 +1236,8 @@ unsigned MachineTraceMetrics::Trace::getResourceLength(
   // Instrs: #instructions in current trace outside current block.
   unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight;
   // Add instruction count from the extra blocks.
-  for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i)
-    Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount;
+  for (const MachineBasicBlock *MBB : Extrablocks)
+    Instrs += TE.MTM.getResources(MBB)->InstrCount;
   Instrs += ExtraInstrs.size();
   Instrs -= RemoveInstrs.size();
   if (unsigned IW = TE.MTM.SchedModel.getIssueWidth())
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 210a7a1649cd..024d166a4987 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -214,10 +214,10 @@ TargetPassConfig::~TargetPassConfig() {
 // Out of line constructor provides default values for pass options and
 // registers all common codegen passes.
 TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
-    : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
-      Started(true), Stopped(false), AddingMachinePasses(false), TM(tm),
-      Impl(nullptr), Initialized(false), DisableVerify(false),
-      EnableTailMerge(true), EnableShrinkWrap(false) {
+    : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
+      StopAfter(nullptr), Started(true), Stopped(false),
+      AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
+      DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) {
 
   Impl = new PassConfigImpl();
 
@@ -288,6 +288,8 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
   // and shouldn't reference it.
   AnalysisID PassID = P->getPassID();
 
+  if (StartBefore == PassID)
+    Started = true;
   if (Started && !Stopped) {
     std::string Banner;
     // Construct banner message before PM->add() as that may delete the pass.
@@ -422,7 +424,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     // removed from the parent invoke(s). This could happen when a landing
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
-    addPass(createSjLjEHPreparePass(TM));
+    addPass(createSjLjEHPreparePass());
     // FALLTHROUGH
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 76583f0de888..b2fdee6c8e4c 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -82,7 +82,8 @@ private:
 
   void calculateSets(MachineFunction &Fn);
   void calculateCallsInformation(MachineFunction &Fn);
-  void calculateCalleeSavedRegisters(MachineFunction &Fn);
+  void assignCalleeSavedSpillSlots(MachineFunction &Fn,
+                                   const BitVector &SavedRegs);
   void insertCSRSpillsAndRestores(MachineFunction &Fn);
   void calculateFrameObjectOffsets(MachineFunction &Fn);
   void replaceFrameIndices(MachineFunction &Fn);
@@ -92,7 +93,7 @@ private:
   void insertPrologEpilogCode(MachineFunction &Fn);
 
   // Convenience for recognizing return blocks.
-  bool isReturnBlock(MachineBasicBlock *MBB);
+  bool isReturnBlock(const MachineBasicBlock *MBB) const;
 };
 } // namespace
 
@@ -127,7 +128,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const {
   return (MBB && !MBB->empty() && MBB->back().isReturn());
 }
 
@@ -143,7 +144,12 @@ void PEI::calculateSets(MachineFunction &Fn) {
   if (MFI->getSavePoint()) {
     SaveBlock = MFI->getSavePoint();
     assert(MFI->getRestorePoint() && "Both restore and save must be set");
-    RestoreBlocks.push_back(MFI->getRestorePoint());
+    MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+    // If RestoreBlock does not have any successor and is not a return block
+    // then the end point is unreachable and we do not need to insert any
+    // epilogue.
+    if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock))
+      RestoreBlocks.push_back(RestoreBlock);
     return;
   }
 
@@ -178,13 +184,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // instructions.
   calculateCallsInformation(Fn);
 
-  // Allow the target machine to make some adjustments to the function
-  // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
-  TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+  // Determine which of the registers in the callee save list should be saved.
+  BitVector SavedRegs;
+  TFI->determineCalleeSaves(Fn, SavedRegs, RS);
 
-  // Scan the function for modified callee saved registers and insert spill code
-  // for any callee saved registers that are modified.
-  calculateCalleeSavedRegisters(Fn);
+  // Insert spill code for any callee saved registers that are modified.
+  assignCalleeSavedSpillSlots(Fn, SavedRegs);
 
   // Determine placement of CSR spill/restore code:
   // place all spills in the entry block, all restores in return blocks.
@@ -290,39 +295,27 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
   }
 }
 
-
-/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
-/// registers.
-void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
-  const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
-  const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
-  MachineFrameInfo *MFI = F.getFrameInfo();
-
-  // Get the callee saved register list...
-  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
-
+void PEI::assignCalleeSavedSpillSlots(MachineFunction &F,
+                                      const BitVector &SavedRegs) {
   // These are used to keep track the callee-save area. Initialize them.
   MinCSFrameIndex = INT_MAX;
   MaxCSFrameIndex = 0;
 
-  // Early exit for targets which have no callee saved registers.
-  if (!CSRegs || CSRegs[0] == 0)
+  if (SavedRegs.empty())
     return;
 
-  // In Naked functions we aren't going to save any registers.
-  if (F.getFunction()->hasFnAttribute(Attribute::Naked))
-    return;
+  const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
 
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    // Functions which call __builtin_unwind_init get all their registers saved.
-    if (F.getRegInfo().isPhysRegUsed(Reg) || F.getMMI().callsUnwindInit()) {
-      // If the reg is modified, save it!
+    if (SavedRegs.test(Reg))
       CSI.push_back(CalleeSavedInfo(Reg));
-    }
   }
 
+  const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
+  MachineFrameInfo *MFI = F.getFrameInfo();
   if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
     // If target doesn't implement this, use generic code.
 
@@ -1033,12 +1026,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
           // Replace this reference to the virtual register with the
           // scratch register.
           assert (ScratchReg && "Missing scratch register!");
-          MachineRegisterInfo &MRI = Fn.getRegInfo();
           Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
           
-          // Make sure MRI now accounts this register as used.
-          MRI.setPhysRegUsed(ScratchReg);
-
           // Because this instruction was processed by the RS before this
           // register was allocated, make sure that the RS now records the
           // register as being used.
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index fd3d4d78968b..660bb4f0e34d 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -986,10 +986,6 @@ void RAFast::AllocateBasicBlock() {
       }
     }
 
-    for (UsedInInstrSet::iterator
-         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
-      MRI->setRegUnitUsed(*I);
-
     // Track registers defined by instruction - early clobbers and tied uses at
     // this point.
     UsedInInstr.clear();
@@ -1050,10 +1046,6 @@ void RAFast::AllocateBasicBlock() {
       killVirtReg(VirtDead[i]);
     VirtDead.clear();
 
-    for (UsedInInstrSet::iterator
-         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
-      MRI->setRegUnitUsed(*I);
-
     if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
       DEBUG(dbgs() << "-- coalescing: " << *MI);
       Coalesced.push_back(MI);
@@ -1103,12 +1095,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
     AllocateBasicBlock();
   }
 
-  // Add the clobber lists for all the instructions we skipped earlier.
-  for (const MCInstrDesc *Desc : SkippedInstrs)
-    if (const uint16_t *Defs = Desc->getImplicitDefs())
-      while (*Defs)
-        MRI->setPhysRegUsed(*Defs++);
-
   // All machine operands and other references to virtual registers have been
   // replaced. Remove the virtual registers.
   MRI->clearVirtRegs();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 26f42c93323a..7ebcf7f54856 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -400,6 +400,8 @@ private:
   typedef SmallVector<HintInfo, 4> HintsInfo;
   BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
   void collectHintInfo(unsigned, HintsInfo &);
+
+  bool isUnusedCalleeSavedReg(unsigned PhysReg) const;
 };
 } // end anonymous namespace
 
@@ -816,6 +818,16 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
   }
 }
 
+/// Returns true if the given \p PhysReg is a callee saved register and has not
+/// been used for allocation yet.
+bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
+  unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
+  if (CSR == 0)
+    return false;
+
+  return !Matrix->isPhysRegUsed(PhysReg);
+}
+
 /// tryEvict - Try to evict all interferences for a physreg.
 /// @param  VirtReg Currently unassigned virtual register.
 /// @param  Order   Physregs to try.
@@ -861,13 +873,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
       continue;
     // The first use of a callee-saved register in a function has cost 1.
     // Don't start using a CSR when the CostPerUseLimit is low.
-    if (CostPerUseLimit == 1)
-     if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
-       if (!MRI->isPhysRegUsed(CSR)) {
-         DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
-                      << PrintReg(CSR, TRI) << '\n');
-         continue;
-       }
+    if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+            << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
+            << '\n');
+      continue;
+    }
 
     if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
       continue;
@@ -1348,9 +1359,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
   unsigned BestCand = NoCand;
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
-   if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
-     if (IgnoreCSR && !MRI->isPhysRegUsed(CSR))
-       continue;
+    if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg))
+      continue;
 
     // Discard bad candidates before we run out of interference cache cursors.
     // This will only affect register classes with a lot of registers (>32).
@@ -2134,7 +2144,8 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
       unsigned ItVirtReg = (*It)->reg;
       if (VRM->hasPhys(ItVirtReg))
         Matrix->unassign(**It);
-      Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]);
+      unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg];
+      Matrix->assign(**It, ItPhysReg);
     }
   }
 
@@ -2441,16 +2452,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
   // First try assigning a free register.
   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
-    // We check other options if we are using a CSR for the first time.
-    bool CSRFirstUse = false;
-    if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
-      if (!MRI->isPhysRegUsed(CSR))
-        CSRFirstUse = true;
-
     // When NewVRegs is not empty, we may have made decisions such as evicting
     // a virtual register, go with the earlier decisions and use the physical
     // register.
-    if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) {
+    if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) &&
+        NewVRegs.empty()) {
       unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
                                               CostPerUseLimit, NewVRegs);
       if (CSRReg || !NewVRegs.empty())
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 450a3051c6ff..c3786e552a13 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -77,6 +77,16 @@ void RegPressureTracker::dump() const {
   P.dump(TRI);
 }
 
+void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
+  for (const PressureChange &Change : *this) {
+    if (!Change.isValid() || Change.getUnitInc() == 0)
+      continue;
+    dbgs() << "    " << TRI.getRegPressureSetName(Change.getPSet())
+           << " " << Change.getUnitInc();
+  }
+  dbgs() << '\n';
+}
+
 /// Increase the current pressure as impacted by these registers and bump
 /// the high water mark if needed.
 void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
@@ -787,6 +797,8 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff,
   RegPressureDelta Delta2;
   getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit);
   if (Delta != Delta2) {
+    dbgs() << "PDiff: ";
+    PDiff->dump(*TRI);
     dbgs() << "DELTA: " << *MI;
     if (Delta.Excess.isValid())
       dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet())
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6056d93ddc7a..52d620b1d540 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -443,8 +443,9 @@ namespace {
       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
       if (LHSTy.isVector())
         return LHSTy;
-      return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy)
-                        : TLI.getPointerTy();
+      auto &DL = DAG.getDataLayout();
+      return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
+                        : TLI.getPointerTy(DL);
     }
 
     /// This method returns true if we are running before type legalization or
@@ -456,7 +457,7 @@ namespace {
 
     /// Convenience wrapper around TargetLowering::getSetCCResultType
     EVT getSetCCResultType(EVT VT) const {
-      return TLI.getSetCCResultType(*DAG.getContext(), VT);
+      return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
     }
   };
 }
@@ -3111,7 +3112,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
           // For big endian targets, we need to add an offset to the pointer
           // to load the correct bytes.  For little endian systems, we merely
           // need to read fewer bytes from the same pointer.
-          if (TLI.isBigEndian()) {
+          if (DAG.getDataLayout().isBigEndian()) {
             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
             unsigned EVTStoreBytes = ExtVT.getStoreSize();
             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
@@ -6675,7 +6676,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
 
   // For big endian targets, we need to adjust the offset to the pointer to
   // load the correct bytes.
-  if (TLI.isBigEndian()) {
+  if (DAG.getDataLayout().isBigEndian()) {
     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
@@ -6873,7 +6874,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
-  bool isLE = TLI.isLittleEndian();
+  bool isLE = DAG.getDataLayout().isLittleEndian();
 
   // noop truncate
   if (N0.getValueType() == N->getValueType(0))
@@ -6926,7 +6927,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     SDValue EltNo = N0->getOperand(1);
     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-      EVT IndexTy = TLI.getVectorIdxTy();
+      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
 
       SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
@@ -7093,8 +7094,8 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
       !LD2->isVolatile() &&
       DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
     unsigned Align = LD1->getAlignment();
-    unsigned NewAlign = TLI.getDataLayout()->
-      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
+        VT.getTypeForEVT(*DAG.getContext()));
 
     if (NewAlign <= Align &&
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
@@ -7150,13 +7151,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       // Do not change the width of a volatile load.
       !cast<LoadSDNode>(N0)->isVolatile() &&
       // Do not remove the cast if the types differ in endian layout.
-      TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
-      TLI.hasBigEndianPartOrdering(VT) &&
+      TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+          TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    unsigned Align = TLI.getDataLayout()->
-      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+        VT.getTypeForEVT(*DAG.getContext()));
     unsigned OrigAlign = LN0->getAlignment();
 
     if (Align <= OrigAlign) {
@@ -7368,7 +7369,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
          i += NumInputsPerOutput) {
-      bool isLE = TLI.isLittleEndian();
+      bool isLE = DAG.getDataLayout().isLittleEndian();
       APInt NewBits = APInt(DstBitSize, 0);
       bool EltIsUndef = true;
       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
@@ -7415,7 +7416,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
     }
 
     // For big endian targets, swap the order of the pieces of each element.
-    if (TLI.isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   }
 
@@ -8373,6 +8374,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
     if (TLI.combineRepeatedFPDivisors(Users.size())) {
       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+      // FIXME: This optimization requires some level of fast-math, so the
+      // created reciprocal node should at least have the 'allowReciprocal'
+      // fast-math-flag set.
       SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
 
       // Dividend / Divisor -> Dividend * Reciprocal
@@ -8381,10 +8385,14 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
         if (Dividend != FPOne) {
           SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
                                         Reciprocal);
-          DAG.ReplaceAllUsesWith(U, NewNode.getNode());
+          CombineTo(U, NewNode);
+        } else if (U != Reciprocal.getNode()) {
+          // In the absence of fast-math-flags, this user node is always the
+          // same node as Reciprocal, but with FMF they may be different nodes.
+          CombineTo(U, Reciprocal);
         }
       }
-      return SDValue();
+      return SDValue(N, 0);  // N was replaced.
     }
   }
 
@@ -8406,30 +8414,29 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
-  if (DAG.getTarget().Options.UnsafeFPMath &&
-      !TLI.isFsqrtCheap()) {
-    // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
-    if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
-      EVT VT = RV.getValueType();
-      SDLoc DL(N);
-      RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
-      AddToWorklist(RV.getNode());
+  if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+    return SDValue();
 
-      // Unfortunately, RV is now NaN if the input was exactly 0.
-      // Select out this case and force the answer to 0.
-      SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
-      SDValue ZeroCmp =
-        DAG.getSetCC(DL, TLI.getSetCCResultType(*DAG.getContext(), VT),
-                     N->getOperand(0), Zero, ISD::SETEQ);
-      AddToWorklist(ZeroCmp.getNode());
-      AddToWorklist(RV.getNode());
+  // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
+  SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+  if (!RV)
+    return SDValue();
+  
+  EVT VT = RV.getValueType();
+  SDLoc DL(N);
+  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+  AddToWorklist(RV.getNode());
 
-      RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT,
-                       DL, VT, ZeroCmp, Zero, RV);
-      return RV;
-    }
-  }
-  return SDValue();
+  // Unfortunately, RV is now NaN if the input was exactly 0.
+  // Select out this case and force the answer to 0.
+  SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+  EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
+  AddToWorklist(ZeroCmp.getNode());
+  AddToWorklist(RV.getNode());
+
+  return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+                     ZeroCmp, Zero, RV);
 }
 
 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
@@ -9144,7 +9151,8 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   } else
     return false;
 
-  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()), AS);
+  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
+                                   VT.getTypeForEVT(*DAG.getContext()), AS);
 }
 
 /// Try turning a load/store into a pre-indexed load/store when the base
@@ -9869,8 +9877,7 @@ struct LoadedSlice {
   /// \pre DAG != nullptr.
   uint64_t getOffsetFromBase() const {
     assert(DAG && "Missing context.");
-    bool IsBigEndian =
-        DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian();
+    bool IsBigEndian = DAG->getDataLayout().isBigEndian();
     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
     uint64_t Offset = Shift / 8;
     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
@@ -9953,7 +9960,7 @@ struct LoadedSlice {
 
     // Check if it will be merged with the load.
     // 1. Check the alignment constraint.
-    unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment(
+    unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
         ResVT.getTypeForEVT(*DAG->getContext()));
 
     if (RequiredAlignment > getAlignment())
@@ -10321,7 +10328,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   unsigned StOffset;
   unsigned NewAlign = St->getAlignment();
 
-  if (DAG.getTargetLoweringInfo().isLittleEndian())
+  if (DAG.getDataLayout().isLittleEndian())
     StOffset = ByteShift;
   else
     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
@@ -10434,12 +10441,12 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
       uint64_t PtrOff = ShAmt / 8;
       // For big endian targets, we need to adjust the offset to the pointer to
       // load the correct bytes.
-      if (TLI.isBigEndian())
+      if (DAG.getDataLayout().isBigEndian())
         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
 
       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
-      if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy))
+      if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
         return SDValue();
 
       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
@@ -10503,7 +10510,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
     unsigned LDAlign = LD->getAlignment();
     unsigned STAlign = ST->getAlignment();
     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
-    unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy);
+    unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
     if (LDAlign < ABIAlign || STAlign < ABIAlign)
       return SDValue();
 
@@ -10685,7 +10692,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
 
     // Construct a single integer constant which is made of the smaller
     // constant inputs.
-    bool IsLE = TLI.isLittleEndian();
+    bool IsLE = DAG.getDataLayout().isLittleEndian();
     for (unsigned i = 0; i < NumElem ; ++i) {
       unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
@@ -10743,7 +10750,7 @@ static bool allowableAlignment(const SelectionDAG &DAG,
     return true;
 
   Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
-  unsigned ABIAlignment = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+  unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
   return (Align >= ABIAlignment);
 }
 
@@ -11205,8 +11212,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       ST->isUnindexed()) {
     unsigned OrigAlign = ST->getAlignment();
     EVT SVT = Value.getOperand(0).getValueType();
-    unsigned Align = TLI.getDataLayout()->
-      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+    unsigned Align = DAG.getDataLayout().getABITypeAlignment(
+        SVT.getTypeForEVT(*DAG.getContext()));
     if (Align <= OrigAlign &&
         ((!LegalOperations && !ST->isVolatile()) ||
          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
@@ -11265,7 +11272,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
           uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
           SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
           SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
-          if (TLI.isBigEndian()) std::swap(Lo, Hi);
+          if (DAG.getDataLayout().isBigEndian())
+            std::swap(Lo, Hi);
 
           unsigned Alignment = ST->getAlignment();
           bool isVolatile = ST->isVolatile();
@@ -11514,7 +11522,7 @@ SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   EVT ResultVT = EVE->getValueType(0);
   EVT VecEltVT = InVecVT.getVectorElementType();
   unsigned Align = OriginalLoad->getAlignment();
-  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
       VecEltVT.getTypeForEVT(*DAG.getContext()));
 
   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
@@ -11648,7 +11656,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     // scalar_to_vector here as well.
 
     if (!LegalOperations) {
-      EVT IndexTy = TLI.getVectorIdxTy();
+      EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
     }
@@ -11825,7 +11833,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   if (!ValidTypes)
     return SDValue();
 
-  bool isLE = TLI.isLittleEndian();
+  bool isLE = DAG.getDataLayout().isLittleEndian();
   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   assert(ElemRatio > 1 && "Invalid element size ratio");
   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
@@ -12079,10 +12087,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
 
         // Try to replace VecIn1 with two extract_subvectors
         // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(VT.getVectorNumElements(), dl, TLI.getVectorIdxTy()));
-        VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-          DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+        VecIn2 = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+            DAG.getConstant(VT.getVectorNumElements(), dl,
+                            TLI.getVectorIdxTy(DAG.getDataLayout())));
+        VecIn1 = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
+            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
       } else
         return SDValue();
     }
@@ -13354,12 +13365,13 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
           const_cast<ConstantFP*>(TV->getConstantFPValue())
         };
         Type *FPTy = Elts[0]->getType();
-        const DataLayout &TD = *TLI.getDataLayout();
+        const DataLayout &TD = DAG.getDataLayout();
 
         // Create a ConstantArray of the two constants.
         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
-        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
-                                            TD.getPrefTypeAlignment(FPTy));
+        SDValue CPIdx =
+            DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
+                                TD.getPrefTypeAlignment(FPTy));
         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
 
         // Get the offsets to the 0 and 1 element of the array so that we can
@@ -13832,6 +13844,15 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   // If they are both volatile then they cannot be reordered.
   if (Op0->isVolatile() && Op1->isVolatile()) return true;
 
+  // If one operation reads from invariant memory, and the other may store, they
+  // cannot alias. These should really be checking the equivalent of mayWrite,
+  // but it only matters for memory nodes other than load /store.
+  if (Op0->isInvariant() && Op1->writeMem())
+    return false;
+
+  if (Op1->isInvariant() && Op0->writeMem())
+    return false;
+
   // Gather base node and offset information.
   SDValue Base1, Base2;
   int64_t Offset1, Offset2;
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 5452b1721bb4..2b9ba2c1b534 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -166,7 +166,7 @@ bool FastISel::hasTrivialKill(const Value *V) {
 }
 
 unsigned FastISel::getRegForValue(const Value *V) {
-  EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+  EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true);
   // Don't handle non-simple values in FastISel.
   if (!RealVT.isSimple())
     return 0;
@@ -228,7 +228,7 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
     if (!Reg) {
       // Try to emit the constant by using an integer constant with a cast.
       const APFloat &Flt = CF->getValueAPF();
-      EVT IntVT = TLI.getPointerTy();
+      EVT IntVT = TLI.getPointerTy(DL);
 
       uint64_t x[2];
       uint32_t IntBitWidth = IntVT.getSizeInBits();
@@ -321,7 +321,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
   bool IdxNIsKill = hasTrivialKill(Idx);
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
-  MVT PtrVT = TLI.getPointerTy();
+  MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
     IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN,
@@ -493,7 +493,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
   // FIXME: What's a good SWAG number for MaxOffs?
   uint64_t MaxOffs = 2048;
   Type *Ty = I->getOperand(0)->getType();
-  MVT VT = TLI.getPointerTy();
+  MVT VT = TLI.getPointerTy(DL);
   for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1,
                                             E = I->op_end();
        OI != E; ++OI) {
@@ -908,10 +908,10 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
   // Handle the incoming return values from the call.
   CLI.clearIns();
   SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(TLI, CLI.RetTy, RetTys);
+  ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys);
 
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI);
+  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL);
 
   bool CanLowerReturn = TLI.CanLowerReturn(
       CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext());
@@ -976,7 +976,7 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
       // not there, but there are cases it cannot get right.
       unsigned FrameAlign = Arg.Alignment;
       if (!FrameAlign)
-        FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+        FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
       Flags.setByValSize(FrameSize);
       Flags.setByValAlign(FrameAlign);
     }
@@ -1245,8 +1245,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
 }
 
 bool FastISel::selectCast(const User *I, unsigned Opcode) {
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  EVT DstVT = TLI.getValueType(I->getType());
+  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(DL, I->getType());
 
   if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other ||
       !DstVT.isSimple())
@@ -1288,8 +1288,8 @@ bool FastISel::selectBitCast(const User *I) {
   }
 
   // Bitcasts of other values become reg-reg copies or BITCAST operators.
-  EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType());
-  EVT DstEVT = TLI.getValueType(I->getType());
+  EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+  EVT DstEVT = TLI.getValueType(DL, I->getType());
   if (SrcEVT == MVT::Other || DstEVT == MVT::Other ||
       !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT))
     // Unhandled type. Halt "fast" selection and bail.
@@ -1413,7 +1413,7 @@ bool FastISel::selectFNeg(const User *I) {
   bool OpRegIsKill = hasTrivialKill(I);
 
   // If the target has ISD::FNEG, use it.
-  EVT VT = TLI.getValueType(I->getType());
+  EVT VT = TLI.getValueType(DL, I->getType());
   unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG,
                                   OpReg, OpRegIsKill);
   if (ResultReg) {
@@ -1456,7 +1456,7 @@ bool FastISel::selectExtractValue(const User *U) {
 
   // Make sure we only try to handle extracts with a legal result.  But also
   // allow i1 because it's easy.
-  EVT RealVT = TLI.getValueType(EVI->getType(), /*AllowUnknown=*/true);
+  EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true);
   if (!RealVT.isSimple())
     return false;
   MVT VT = RealVT.getSimpleVT();
@@ -1480,7 +1480,7 @@ bool FastISel::selectExtractValue(const User *U) {
   unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
 
   SmallVector<EVT, 4> AggValueVTs;
-  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+  ComputeValueVTs(TLI, DL, AggTy, AggValueVTs);
 
   for (unsigned i = 0; i < VTIndex; i++)
     ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]);
@@ -1582,8 +1582,8 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
 
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
-    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-    EVT DstVT = TLI.getValueType(I->getType());
+    EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(DL, I->getType());
     if (DstVT.bitsGT(SrcVT))
       return selectCast(I, ISD::ZERO_EXTEND);
     if (DstVT.bitsLT(SrcVT))
@@ -1612,7 +1612,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
                    bool SkipTargetIndependentISel)
     : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
       MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
-      TM(FuncInfo.MF->getTarget()), DL(*TM.getDataLayout()),
+      TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
       TII(*MF->getSubtarget().getInstrInfo()),
       TLI(*MF->getSubtarget().getTargetLowering()),
       TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
@@ -2037,7 +2037,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // own moves. Second, this check is necessary because FastISel doesn't
       // use CreateRegs to create registers, so it always creates
       // exactly one register for each non-void instruction.
-      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true);
       if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
         // Handle integer promotions, though, because they're common and easy.
         if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index ecaa2c972719..cc306cbf5ae4 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -90,7 +90,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI);
+  GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI,
+                mf.getDataLayout());
   CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF,
                                        Fn->isVarArg(), Outs, Fn->getContext());
 
@@ -106,9 +107,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
         if (AI->isStaticAlloca()) {
           const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
           Type *Ty = AI->getAllocatedType();
-          uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty);
+          uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
           unsigned Align =
-              std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty),
+              std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
                        AI->getAlignment());
 
           TySize *= CUI->getZExtValue();   // Get total allocated size.
@@ -118,10 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
             MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
 
         } else {
-          unsigned Align = std::max(
-              (unsigned)TLI->getDataLayout()->getPrefTypeAlignment(
-                AI->getAllocatedType()),
-              AI->getAlignment());
+          unsigned Align =
+              std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(
+                           AI->getAllocatedType()),
+                       AI->getAlignment());
           unsigned StackAlign =
               MF->getSubtarget().getFrameLowering()->getStackAlignment();
           if (Align <= StackAlign)
@@ -138,7 +139,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
           const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
           std::vector<TargetLowering::AsmOperandInfo> Ops =
-              TLI->ParseConstraints(TRI, CS);
+              TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
           for (size_t I = 0, E = Ops.size(); I != E; ++I) {
             TargetLowering::AsmOperandInfo &Op = Ops[I];
             if (Op.Type == InlineAsm::isClobber) {
@@ -148,7 +149,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                   TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
                                                     Op.ConstraintVT);
               if (PhysReg.first == SP)
-                MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true);
+                MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
             }
           }
         }
@@ -236,7 +237,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       assert(PHIReg && "PHI node does not have an assigned virtual register!");
 
       SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(*TLI, PN->getType(), ValueVTs);
+      ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
         EVT VT = ValueVTs[vti];
         unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
@@ -366,7 +367,7 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
 
   SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(*TLI, Ty, ValueVTs);
+  ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
 
   unsigned FirstReg = 0;
   for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
@@ -413,7 +414,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
     return;
 
   SmallVector<EVT, 1> ValueVTs;
-  ComputeValueVTs(*TLI, Ty, ValueVTs);
+  ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs);
   assert(ValueVTs.size() == 1 &&
          "PHIs with non-vector integer types should have a single VT.");
   EVT IntVT = ValueVTs[0];
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 42595cb010c2..5ec10308dc28 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -406,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
     Type *Type = CP->getType();
     // MachineConstantPool wants an explicit alignment.
     if (Align == 0) {
-      Align = MF->getTarget().getDataLayout()->getPrefTypeAlignment(Type);
+      Align = MF->getDataLayout().getPrefTypeAlignment(Type);
       if (Align == 0) {
         // Alignment of vector types.  FIXME!
-        Align = MF->getTarget().getDataLayout()->getTypeAllocSize(Type);
+        Align = MF->getDataLayout().getTypeAllocSize(Type);
       }
     }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c0d7871bf08b..21ab07234c81 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -65,7 +65,7 @@ class SelectionDAGLegalize {
   SmallSetVector<SDNode *, 16> *UpdatedNodes;
 
   EVT getSetCCResultType(EVT VT) const {
-    return TLI.getSetCCResultType(*DAG.getContext(), VT);
+    return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
   }
 
   // Libcall insertion helpers.
@@ -269,7 +269,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
     }
   }
 
-  SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+  SDValue CPIdx =
+      DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   if (Extend) {
     SDValue Result =
@@ -331,7 +332,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
     SDValue Store = DAG.getTruncStore(Chain, dl,
                                       Val, StackPtr, MachinePointerInfo(),
                                       StoredVT, false, false, 0);
-    SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy(AS));
+    SDValue Increment = DAG.getConstant(
+        RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout(), AS));
     SmallVector<SDValue, 8> Stores;
     unsigned Offset = 0;
 
@@ -385,24 +387,27 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   int IncrementSize = NumBits / 8;
 
   // Divide the stored value in two parts.
-  SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
-                                      TLI.getShiftAmountTy(Val.getValueType()));
+  SDValue ShiftAmount =
+      DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType(),
+                                                        DAG.getDataLayout()));
   SDValue Lo = Val;
   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
 
   // Store the two parts
   SDValue Store1, Store2;
-  Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
-                             ST->getPointerInfo(), NewStoredVT,
+  Store1 = DAG.getTruncStore(Chain, dl,
+                             DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
+                             Ptr, ST->getPointerInfo(), NewStoredVT,
                              ST->isVolatile(), ST->isNonTemporal(), Alignment);
 
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, dl, TLI.getPointerTy(AS)));
+                    DAG.getConstant(IncrementSize, dl,
+                                    TLI.getPointerTy(DAG.getDataLayout(), AS)));
   Alignment = MinAlign(Alignment, IncrementSize);
-  Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
-                             ST->getPointerInfo().getWithOffset(IncrementSize),
-                             NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
-                             Alignment, ST->getAAInfo());
+  Store2 = DAG.getTruncStore(
+      Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
+      ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT,
+      ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo());
 
   SDValue Result =
     DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
@@ -448,7 +453,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     // Make sure the stack slot is also aligned for the register type.
     SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
 
-    SDValue Increment = DAG.getConstant(RegBytes, dl, TLI.getPointerTy());
+    SDValue Increment =
+        DAG.getConstant(RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout()));
     SmallVector<SDValue, 8> Stores;
     SDValue StackPtr = StackBase;
     unsigned Offset = 0;
@@ -522,7 +528,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
 
   // Load the value in two parts
   SDValue Lo, Hi;
-  if (TLI.isLittleEndian()) {
+  if (DAG.getDataLayout().isLittleEndian()) {
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
                         NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), LD->isInvariant(), Alignment,
@@ -549,8 +555,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   }
 
   // aggregate the two parts
-  SDValue ShiftAmount = DAG.getConstant(NumBits, dl,
-                                       TLI.getShiftAmountTy(Hi.getValueType()));
+  SDValue ShiftAmount =
+      DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType(),
+                                                        DAG.getDataLayout()));
   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
 
@@ -581,7 +588,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   EVT VT    = Tmp1.getValueType();
   EVT EltVT = VT.getVectorElementType();
   EVT IdxVT = Tmp3.getValueType();
-  EVT PtrVT = TLI.getPointerTy();
+  EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
   SDValue StackPtr = DAG.CreateStackTemporary(VT);
 
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -677,7 +684,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
         const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt();
         SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32);
         SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32);
-        if (TLI.isBigEndian()) std::swap(Lo, Hi);
+        if (DAG.getDataLayout().isBigEndian())
+          std::swap(Lo, Hi);
 
         Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile,
                           isNonTemporal, Alignment, AAInfo);
@@ -724,7 +732,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           unsigned Align = ST->getAlignment();
           if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+            unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
             if (Align < ABIAlignment)
               ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           }
@@ -756,6 +764,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
 
       EVT StVT = ST->getMemoryVT();
       unsigned StWidth = StVT.getSizeInBits();
+      auto &DL = DAG.getDataLayout();
 
       if (StWidth != StVT.getStoreSizeInBits()) {
         // Promote to a byte-sized store with upper bits zero if not
@@ -782,7 +791,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         SDValue Lo, Hi;
         unsigned IncrementSize;
 
-        if (TLI.isLittleEndian()) {
+        if (DL.isLittleEndian()) {
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
           // Store the bottom RoundWidth bits.
           Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
@@ -795,9 +804,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                             DAG.getConstant(IncrementSize, dl,
                                             Ptr.getValueType()));
-          Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
-                           DAG.getConstant(RoundWidth, dl,
-                                   TLI.getShiftAmountTy(Value.getValueType())));
+          Hi = DAG.getNode(
+              ISD::SRL, dl, Value.getValueType(), Value,
+              DAG.getConstant(RoundWidth, dl,
+                              TLI.getShiftAmountTy(Value.getValueType(), DL)));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
                                  ExtraVT, isVolatile, isNonTemporal,
@@ -806,9 +816,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // Big endian - avoid unaligned stores.
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
           // Store the top RoundWidth bits.
-          Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value,
-                           DAG.getConstant(ExtraWidth, dl,
-                                   TLI.getShiftAmountTy(Value.getValueType())));
+          Hi = DAG.getNode(
+              ISD::SRL, dl, Value.getValueType(), Value,
+              DAG.getConstant(ExtraWidth, dl,
+                              TLI.getShiftAmountTy(Value.getValueType(), DL)));
           Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
                                  RoundVT, isVolatile, isNonTemporal, Alignment,
                                  AAInfo);
@@ -838,7 +849,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           // expand it.
           if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty);
+            unsigned ABIAlignment = DL.getABITypeAlignment(Ty);
             if (Align < ABIAlignment)
               ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           }
@@ -890,8 +901,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       // expand it.
       if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
         Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-        unsigned ABIAlignment =
-          TLI.getDataLayout()->getABITypeAlignment(Ty);
+        unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
         if (Align < ABIAlignment){
           ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
         }
@@ -995,8 +1005,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
     SDValue Lo, Hi, Ch;
     unsigned IncrementSize;
+    auto &DL = DAG.getDataLayout();
 
-    if (TLI.isLittleEndian()) {
+    if (DL.isLittleEndian()) {
       // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
       // Load the bottom RoundWidth bits.
       Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
@@ -1020,9 +1031,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
                        Hi.getValue(1));
 
       // Move the top bits to the right place.
-      Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                       DAG.getConstant(RoundWidth, dl,
-                                      TLI.getShiftAmountTy(Hi.getValueType())));
+      Hi = DAG.getNode(
+          ISD::SHL, dl, Hi.getValueType(), Hi,
+          DAG.getConstant(RoundWidth, dl,
+                          TLI.getShiftAmountTy(Hi.getValueType(), DL)));
 
       // Join the hi and lo parts.
       Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1051,9 +1063,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
                        Hi.getValue(1));
 
       // Move the top bits to the right place.
-      Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                       DAG.getConstant(ExtraWidth, dl,
-                                      TLI.getShiftAmountTy(Hi.getValueType())));
+      Hi = DAG.getNode(
+          ISD::SHL, dl, Hi.getValueType(), Hi,
+          DAG.getConstant(ExtraWidth, dl,
+                          TLI.getShiftAmountTy(Hi.getValueType(), DL)));
 
       // Join the hi and lo parts.
       Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1086,7 +1099,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
         unsigned Align = LD->getAlignment();
         if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
           Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-          unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty);
+          unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
           if (Align < ABIAlignment){
             ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
           }
@@ -1439,7 +1452,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
                     DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
 
-  Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
+  Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
   StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
 
   SDValue NewLoad;
@@ -1491,7 +1504,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
 
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
                     DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
-  Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy());
+  Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout()));
 
   SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
                                     StackPtr);
@@ -1569,15 +1582,16 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
     // Convert to an integer with the same sign bit.
     SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
   } else {
+    auto &DL = DAG.getDataLayout();
     // Store the float to memory, then load the sign part out as an integer.
-    MVT LoadTy = TLI.getPointerTy();
+    MVT LoadTy = TLI.getPointerTy(DL);
     // First create a temporary that is aligned for both the load and store.
     SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
     // Then store the float to it.
     SDValue Ch =
       DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
                    false, false, 0);
-    if (TLI.isBigEndian()) {
+    if (DL.isBigEndian()) {
       assert(FloatVT.isByteSized() && "Unsupported floating point type!");
       // Load out a legal integer with the same sign bit as the float.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
@@ -1599,9 +1613,10 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
         (FloatVT.getSizeInBits() - 8 * ByteOffset);
       assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
       if (BitShift)
-        SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
-                              DAG.getConstant(BitShift, dl,
-                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+        SignBit = DAG.getNode(
+            ISD::SHL, dl, LoadTy, SignBit,
+            DAG.getConstant(BitShift, dl,
+                            TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
     }
   }
   // Now get the sign bit proper, by seeing whether the value is negative.
@@ -1777,9 +1792,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
                                                EVT DestVT,
                                                SDLoc dl) {
   // Create the stack frame object.
-  unsigned SrcAlign =
-    TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType().
-                                              getTypeForEVT(*DAG.getContext()));
+  unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
+      SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
   SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
 
   FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
@@ -1790,7 +1804,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
   Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
-  unsigned DestAlign = TLI.getDataLayout()->getPrefTypeAlignment(DestType);
+  unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType);
 
   // Emit a store to the stack slot.  Use a truncstore if the input value is
   // later than DestVT.
@@ -1994,7 +2008,8 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
       }
     }
     Constant *CP = ConstantVector::get(CV);
-    SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+    SDValue CPIdx =
+        DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                        MachinePointerInfo::getConstantPool(),
@@ -2058,7 +2073,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
     Args.push_back(Entry);
   }
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy());
+                                         TLI.getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
 
@@ -2106,7 +2121,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
     Args.push_back(Entry);
   }
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy());
+                                         TLI.getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
 
@@ -2140,7 +2155,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
     Args.push_back(Entry);
   }
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy());
+                                         TLI.getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
 
@@ -2277,7 +2292,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
   Args.push_back(Entry);
 
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy());
+                                         TLI.getPointerTy(DAG.getDataLayout()));
 
   SDLoc dl(Node);
   TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2389,7 +2404,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
   Args.push_back(Entry);
 
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy());
+                                         TLI.getPointerTy(DAG.getDataLayout()));
 
   SDLoc dl(Node);
   TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2426,7 +2441,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     SDValue Hi = StackSlot;
     SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(),
                              StackSlot, WordOff);
-    if (TLI.isLittleEndian())
+    if (DAG.getDataLayout().isLittleEndian())
       std::swap(Hi, Lo);
 
     // if signed map to unsigned space
@@ -2509,8 +2524,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     if (!isSigned) {
       SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
 
-      SDValue ShiftConst =
-          DAG.getConstant(1, dl, TLI.getShiftAmountTy(Op0.getValueType()));
+      SDValue ShiftConst = DAG.getConstant(
+          1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout()));
       SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
       SDValue AndConst = DAG.getConstant(1, dl, MVT::i64);
       SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
@@ -2545,7 +2560,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                               MVT::i64),
                               ISD::SETUGE);
     SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0);
-    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
+    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout());
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
                              DAG.getConstant(32, dl, SHVT));
@@ -2584,11 +2599,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
   case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
   }
-  if (TLI.isLittleEndian()) FF <<= 32;
+  if (DAG.getDataLayout().isLittleEndian())
+    FF <<= 32;
   Constant *FudgeFactor = ConstantInt::get(
                                        Type::getInt64Ty(*DAG.getContext()), FF);
 
-  SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+  SDValue CPIdx =
+      DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout()));
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset);
   Alignment = std::min(Alignment, 4u);
@@ -2699,7 +2716,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
 /// Open code the operations for BSWAP of the specified operation.
 SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
   EVT VT = Op.getValueType();
-  EVT SHVT = TLI.getShiftAmountTy(VT);
+  EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unhandled Expand type in BSWAP!");
@@ -2756,7 +2773,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
   default: llvm_unreachable("Cannot expand this yet!");
   case ISD::CTPOP: {
     EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy(VT);
+    EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
     unsigned Len = VT.getSizeInBits();
 
     assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
@@ -2814,7 +2831,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     //
     // Ref: "Hacker's Delight" by Henry Warren
     EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy(VT);
+    EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
@@ -2903,10 +2920,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     TargetLowering::ArgListTy Args;
 
     TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
-      .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                 DAG.getExternalSymbol("__sync_synchronize",
-                 TLI.getPointerTy()), std::move(Args), 0);
+    CLI.setDebugLoc(dl)
+        .setChain(Node->getOperand(0))
+        .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+                   DAG.getExternalSymbol("__sync_synchronize",
+                                         TLI.getPointerTy(DAG.getDataLayout())),
+                   std::move(Args), 0);
 
     std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
 
@@ -3002,10 +3021,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // If this operation is not supported, lower it to 'abort()' call
     TargetLowering::ArgListTy Args;
     TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
-      .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                 DAG.getExternalSymbol("abort", TLI.getPointerTy()),
-                 std::move(Args), 0);
+    CLI.setDebugLoc(dl)
+        .setChain(Node->getOperand(0))
+        .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+                   DAG.getExternalSymbol("abort",
+                                         TLI.getPointerTy(DAG.getDataLayout())),
+                   std::move(Args), 0);
     std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
 
     Results.push_back(CallResult.second);
@@ -3028,7 +3049,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // SAR.  However, it is doubtful that any exist.
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     EVT VT = Node->getValueType(0);
-    EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
+    EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
     if (VT.isVector())
       ShiftAmountTy = VT;
     unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
@@ -3092,9 +3113,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp2 = Node->getOperand(1);
     unsigned Align = Node->getConstantOperandVal(3);
 
-    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
-                                     MachinePointerInfo(V),
-                                     false, false, false, 0);
+    SDValue VAListLoad =
+        DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2,
+                    MachinePointerInfo(V), false, false, false, 0);
     SDValue VAList = VAListLoad;
 
     if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -3111,10 +3132,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
     // Increment the pointer, VAList, to the next vaarg
     Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
-                       DAG.getConstant(TLI.getDataLayout()->
-                          getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
-                                       dl,
-                                       VAList.getValueType()));
+                       DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(
+                                           VT.getTypeForEVT(*DAG.getContext())),
+                                       dl, VAList.getValueType()));
     // Store the incremented VAList to the legalized pointer
     Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
                         MachinePointerInfo(V), false, false, 0);
@@ -3129,9 +3149,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // output, returning the chain.
     const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
-    Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
-                       Node->getOperand(2), MachinePointerInfo(VS),
-                       false, false, false, 0);
+    Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl,
+                       Node->getOperand(0), Node->getOperand(2),
+                       MachinePointerInfo(VS), false, false, false, 0);
     Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
                         MachinePointerInfo(VD), false, false, 0);
     Results.push_back(Tmp1);
@@ -3226,14 +3246,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       }
       unsigned Idx = Mask[i];
       if (Idx < NumElems)
-        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Op0,
-                                  DAG.getConstant(Idx, dl, TLI.getVectorIdxTy())));
+        Ops.push_back(DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+            DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
       else
-        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Op1,
-                                  DAG.getConstant(Idx - NumElems, dl,
-                                                  TLI.getVectorIdxTy())));
+        Ops.push_back(DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1,
+            DAG.getConstant(Idx - NumElems, dl,
+                            TLI.getVectorIdxTy(DAG.getDataLayout()))));
     }
 
     Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
@@ -3247,8 +3267,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
       // 1 -> Hi
       Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
-                         DAG.getConstant(OpTy.getSizeInBits()/2, dl,
-                    TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
+                         DAG.getConstant(OpTy.getSizeInBits() / 2, dl,
+                                         TLI.getShiftAmountTy(
+                                             Node->getOperand(0).getValueType(),
+                                             DAG.getDataLayout())));
       Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
     } else {
       // 0 -> Lo
@@ -3646,8 +3668,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
       Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
       Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
-      SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), dl,
-                                      TLI.getShiftAmountTy(HalfType));
+      SDValue Shift =
+          DAG.getConstant(HalfType.getSizeInBits(), dl,
+                          TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
       Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
       Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
       break;
@@ -3759,12 +3782,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // The high part is obtained by SRA'ing all but one of the bits of low
       // part.
       unsigned LoSize = VT.getSizeInBits();
-      SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
-                                  DAG.getConstant(LoSize - 1, dl,
-                                                  TLI.getPointerTy()));
-      SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
-                                  DAG.getConstant(LoSize - 1, dl,
-                                                  TLI.getPointerTy()));
+      SDValue HiLHS =
+          DAG.getNode(ISD::SRA, dl, VT, RHS,
+                      DAG.getConstant(LoSize - 1, dl,
+                                      TLI.getPointerTy(DAG.getDataLayout())));
+      SDValue HiRHS =
+          DAG.getNode(ISD::SRA, dl, VT, LHS,
+                      DAG.getConstant(LoSize - 1, dl,
+                                      TLI.getPointerTy(DAG.getDataLayout())));
 
       // Here we're passing the 2 arguments explicitly as 4 arguments that are
       // pre-lowered to the correct types. This all depends upon WideVT not
@@ -3785,8 +3810,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
 
     if (isSigned) {
-      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, dl,
-                             TLI.getShiftAmountTy(BottomHalf.getValueType()));
+      Tmp1 = DAG.getConstant(
+          VT.getSizeInBits() - 1, dl,
+          TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
       Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
       TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
                              ISD::SETNE);
@@ -3802,9 +3828,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     EVT PairTy = Node->getValueType(0);
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
-    Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
-                       DAG.getConstant(PairTy.getSizeInBits()/2, dl,
-                                       TLI.getShiftAmountTy(PairTy)));
+    Tmp2 = DAG.getNode(
+        ISD::SHL, dl, PairTy, Tmp2,
+        DAG.getConstant(PairTy.getSizeInBits() / 2, dl,
+                        TLI.getShiftAmountTy(PairTy, DAG.getDataLayout())));
     Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
     break;
   }
@@ -3828,9 +3855,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Table = Node->getOperand(1);
     SDValue Index = Node->getOperand(2);
 
-    EVT PTy = TLI.getPointerTy();
+    EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
 
-    const DataLayout &TD = *TLI.getDataLayout();
+    const DataLayout &TD = DAG.getDataLayout();
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
@@ -3936,7 +3963,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
              "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
              "expanded.");
-      EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT);
+      EVT CCVT =
+          TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
       SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
       Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
       break;
@@ -4036,14 +4064,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
 
     SmallVector<SDValue, 8> Scalars;
     for (unsigned Idx = 0; Idx < NumElem; Idx++) {
-      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               VT.getScalarType(),
-                               Node->getOperand(0),
-                               DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
-      SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               VT.getScalarType(),
-                               Node->getOperand(1),
-                               DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
+      SDValue Ex = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0),
+          DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+      SDValue Sh = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1),
+          DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
       Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
                                     VT.getScalarType(), Ex, Sh));
     }
@@ -4114,9 +4140,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
-    Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
-                       DAG.getConstant(DiffBits, dl,
-                                       TLI.getShiftAmountTy(NVT)));
+    Tmp1 = DAG.getNode(
+        ISD::SRL, dl, NVT, Tmp1,
+        DAG.getConstant(DiffBits, dl,
+                        TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
     Results.push_back(Tmp1);
     break;
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 37fdf4453fd4..3c50a4155731 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -218,29 +218,35 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
   unsigned RSize = RVT.getSizeInBits();
 
   // First get the sign bit of second operand.
-  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
-                                  DAG.getConstant(RSize - 1, dl,
-                                                  TLI.getShiftAmountTy(RVT)));
+  SDValue SignBit = DAG.getNode(
+      ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT),
+      DAG.getConstant(RSize - 1, dl,
+                      TLI.getShiftAmountTy(RVT, DAG.getDataLayout())));
   SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
 
   // Shift right or sign-extend it if the two operands have different types.
   int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
   if (SizeDiff > 0) {
-    SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
-                          DAG.getConstant(SizeDiff, dl,
-                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+    SignBit =
+        DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+                    DAG.getConstant(SizeDiff, dl,
+                                    TLI.getShiftAmountTy(SignBit.getValueType(),
+                                                         DAG.getDataLayout())));
     SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
   } else if (SizeDiff < 0) {
     SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
-    SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
-                          DAG.getConstant(-SizeDiff, dl,
-                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+    SignBit =
+        DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+                    DAG.getConstant(-SizeDiff, dl,
+                                    TLI.getShiftAmountTy(SignBit.getValueType(),
+                                                         DAG.getDataLayout())));
   }
 
   // Clear the sign bit of the first operand.
-  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
-                               DAG.getConstant(LSize - 1, dl,
-                                               TLI.getShiftAmountTy(LVT)));
+  SDValue Mask = DAG.getNode(
+      ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT),
+      DAG.getConstant(LSize - 1, dl,
+                      TLI.getShiftAmountTy(LVT, DAG.getDataLayout())));
   Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT));
   LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f41202c4f8a4..9f060a09a0f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -282,7 +282,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     Lo = BitConvertToInteger(Lo);
     Hi = BitConvertToInteger(Hi);
 
-    if (TLI.isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
       std::swap(Lo, Hi);
 
     InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
@@ -310,8 +310,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
-                     DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT)));
+  return DAG.getNode(
+      ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+      DAG.getConstant(DiffBits, dl,
+                      TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -799,7 +801,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   }
 
   // Handle endianness of the load.
-  if (TLI.isBigEndian())
+  if (DAG.getDataLayout().isBigEndian())
     std::reverse(Parts.begin(), Parts.end());
 
   // Assemble the parts in the promoted type.
@@ -809,8 +811,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
     SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
     // Shift it to the right position and "or" it in.
     Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
-                       DAG.getConstant(i*RegVT.getSizeInBits(), dl,
-                                       TLI.getPointerTy()));
+                       DAG.getConstant(i * RegVT.getSizeInBits(), dl,
+                                       TLI.getPointerTy(DAG.getDataLayout())));
     Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
   }
 
@@ -1004,7 +1006,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
 
   Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
                    DAG.getConstant(OVT.getSizeInBits(), dl,
-                                   TLI.getPointerTy()));
+                                   TLI.getPointerTy(DAG.getDataLayout())));
   return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
 }
 
@@ -1063,7 +1065,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
 
   // Promote the index.
   SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N),
-                                   TLI.getVectorIdxTy());
+                                   TLI.getVectorIdxTy(DAG.getDataLayout()));
   return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
                                 N->getOperand(1), Idx), 0);
 }
@@ -1356,9 +1358,9 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   return ExpandChainLibCall(LC, Node, false);
 }
 
-/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// N is a shift by a value that needs to be expanded,
 /// and the shift amount is a constant 'Amt'.  Expand the operation.
-void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt,
                                              SDValue &Lo, SDValue &Hi) {
   SDLoc DL(N);
   // Expand the incoming operand to be shifted, so that we have its parts
@@ -1379,9 +1381,9 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
   EVT ShTy = N->getOperand(1).getValueType();
 
   if (N->getOpcode() == ISD::SHL) {
-    if (Amt > VTBits) {
+    if (Amt.ugt(VTBits)) {
       Lo = Hi = DAG.getConstant(0, DL, NVT);
-    } else if (Amt > NVTBits) {
+    } else if (Amt.ugt(NVTBits)) {
       Lo = DAG.getConstant(0, DL, NVT);
       Hi = DAG.getNode(ISD::SHL, DL,
                        NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy));
@@ -1403,16 +1405,15 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                        DAG.getNode(ISD::SHL, DL, NVT, InH,
                                    DAG.getConstant(Amt, DL, ShTy)),
                        DAG.getNode(ISD::SRL, DL, NVT, InL,
-                                   DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+                                   DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
     }
     return;
   }
 
   if (N->getOpcode() == ISD::SRL) {
-    if (Amt > VTBits) {
-      Lo = DAG.getConstant(0, DL, NVT);
-      Hi = DAG.getConstant(0, DL, NVT);
-    } else if (Amt > NVTBits) {
+    if (Amt.ugt(VTBits)) {
+      Lo = Hi = DAG.getConstant(0, DL, NVT);
+    } else if (Amt.ugt(NVTBits)) {
       Lo = DAG.getNode(ISD::SRL, DL,
                        NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy));
       Hi = DAG.getConstant(0, DL, NVT);
@@ -1424,19 +1425,19 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                        DAG.getNode(ISD::SRL, DL, NVT, InL,
                                    DAG.getConstant(Amt, DL, ShTy)),
                        DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                   DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+                                   DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
       Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
     }
     return;
   }
 
   assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
-  if (Amt > VTBits) {
+  if (Amt.ugt(VTBits)) {
     Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
                           DAG.getConstant(NVTBits - 1, DL, ShTy));
-  } else if (Amt > NVTBits) {
+  } else if (Amt.ugt(NVTBits)) {
     Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
-                     DAG.getConstant(Amt-NVTBits, DL, ShTy));
+                     DAG.getConstant(Amt - NVTBits, DL, ShTy));
     Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
                      DAG.getConstant(NVTBits - 1, DL, ShTy));
   } else if (Amt == NVTBits) {
@@ -1448,7 +1449,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
                      DAG.getNode(ISD::SRL, DL, NVT, InL,
                                  DAG.getConstant(Amt, DL, ShTy)),
                      DAG.getNode(ISD::SHL, DL, NVT, InH,
-                                 DAG.getConstant(NVTBits - Amt, DL, ShTy)));
+                                 DAG.getConstant(-Amt + NVTBits, DL, ShTy)));
     Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy));
   }
 }
@@ -1808,7 +1809,8 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
     Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
     // The high part replicates the sign bit of Lo, make it explicit.
     Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                     DAG.getConstant(NVTBits - 1, dl, TLI.getPointerTy()));
+                     DAG.getConstant(NVTBits - 1, dl,
+                                     TLI.getPointerTy(DAG.getDataLayout())));
   }
 }
 
@@ -1975,7 +1977,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
       // lo part.
       unsigned LoSize = Lo.getValueType().getSizeInBits();
       Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                       DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
+                       DAG.getConstant(LoSize - 1, dl,
+                                       TLI.getPointerTy(DAG.getDataLayout())));
     } else if (ExtType == ISD::ZEXTLOAD) {
       // The high part is just a zero.
       Hi = DAG.getConstant(0, dl, NVT);
@@ -1984,7 +1987,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
       // The high part is undefined.
       Hi = DAG.getUNDEF(NVT);
     }
-  } else if (TLI.isLittleEndian()) {
+  } else if (DAG.getDataLayout().isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
                      isVolatile, isNonTemporal, isInvariant, Alignment,
@@ -2039,15 +2042,16 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
     if (ExcessBits < NVT.getSizeInBits()) {
       // Transfer low bits from the bottom of Hi to the top of Lo.
-      Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
-                       DAG.getNode(ISD::SHL, dl, NVT, Hi,
-                                   DAG.getConstant(ExcessBits, dl,
-                                                   TLI.getPointerTy())));
+      Lo = DAG.getNode(
+          ISD::OR, dl, NVT, Lo,
+          DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                      DAG.getConstant(ExcessBits, dl,
+                                      TLI.getPointerTy(DAG.getDataLayout()))));
       // Move high bits to the right position in Hi.
-      Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
-                       NVT, Hi,
+      Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT,
+                       Hi,
                        DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
-                                       TLI.getPointerTy()));
+                                       TLI.getPointerTy(DAG.getDataLayout())));
     }
   }
 
@@ -2173,7 +2177,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
   // If we can emit an efficient shift operation, do so now.  Check to see if
   // the RHS is a constant.
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+    return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi);
 
   // If we can determine that the high bit of the shift is zero or one, even if
   // the low bits are variable, emit this shift in an optimized form.
@@ -2206,7 +2210,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     // have an illegal type.  Fix that first by casting the operand, otherwise
     // the new SHL_PARTS operation would need further legalization.
     SDValue ShiftOp = N->getOperand(1);
-    EVT ShiftTy = TLI.getShiftAmountTy(VT);
+    EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
     assert(ShiftTy.getScalarType().getSizeInBits() >=
            Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
            "ShiftAmountTy is too small to cover the range of this type!");
@@ -2276,8 +2280,9 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
     Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
     // The high part is obtained by SRA'ing all but one of the bits of low part.
     unsigned LoSize = NVT.getSizeInBits();
-    Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
-                     DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy()));
+    Hi = DAG.getNode(
+        ISD::SRA, dl, NVT, Lo,
+        DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout())));
   } else {
     // For example, extension of an i48 to an i64.  The operand type necessarily
     // promotes to the result type, so will end up being expanded too.
@@ -2312,7 +2317,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
     // things like sextinreg V:i64 from i8.
     Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
                      DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
-                                     TLI.getPointerTy()));
+                                     TLI.getPointerTy(DAG.getDataLayout())));
   } else {
     // For example, extension of an i48 to an i64.  Leave the low part alone,
     // sext_inreg the high part.
@@ -2355,10 +2360,10 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
   Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
-  Hi = DAG.getNode(ISD::SRL, dl,
-                   N->getOperand(0).getValueType(), N->getOperand(0),
+  Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(),
+                   N->getOperand(0),
                    DAG.getConstant(NVT.getSizeInBits(), dl,
-                                   TLI.getPointerTy()));
+                                   TLI.getPointerTy(DAG.getDataLayout())));
   Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
 }
 
@@ -2414,7 +2419,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   }
 
   Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
-  EVT PtrVT = TLI.getPointerTy();
+  EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
   Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
 
   // Replace this with a libcall that will check overflow.
@@ -2845,7 +2850,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
                              Alignment, AAInfo);
   }
 
-  if (TLI.isLittleEndian()) {
+  if (DAG.getDataLayout().isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     GetExpandedInteger(N->getValue(), Lo, Hi);
 
@@ -2882,11 +2887,12 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
     // Transfer high bits from the top of Lo to the bottom of Hi.
     Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
                      DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl,
-                                     TLI.getPointerTy()));
-    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
-                     DAG.getNode(ISD::SRL, dl, NVT, Lo,
-                                 DAG.getConstant(ExcessBits, dl,
-                                                 TLI.getPointerTy())));
+                                     TLI.getPointerTy(DAG.getDataLayout())));
+    Hi = DAG.getNode(
+        ISD::OR, dl, NVT, Hi,
+        DAG.getNode(ISD::SRL, dl, NVT, Lo,
+                    DAG.getConstant(ExcessBits, dl,
+                                    TLI.getPointerTy(DAG.getDataLayout()))));
   }
 
   // Store both the high bits and maybe some of the low bits.
@@ -2956,14 +2962,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
                                    ISD::SETLT);
 
     // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
-    SDValue FudgePtr = DAG.getConstantPool(
-                               ConstantInt::get(*DAG.getContext(), FF.zext(64)),
-                                           TLI.getPointerTy());
+    SDValue FudgePtr =
+        DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+                            TLI.getPointerTy(DAG.getDataLayout()));
 
     // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
     SDValue Zero = DAG.getIntPtrConstant(0, dl);
     SDValue Four = DAG.getIntPtrConstant(4, dl);
-    if (TLI.isBigEndian()) std::swap(Zero, Four);
+    if (DAG.getDataLayout().isBigEndian())
+      std::swap(Zero, Four);
     SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
                                    Zero, Four);
     unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
@@ -3113,9 +3120,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   for (unsigned i = 0; i < NumOperands; ++i) {
     SDValue Op = N->getOperand(i);
     for (unsigned j = 0; j < NumElem; ++j) {
-      SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                InElemTy, Op, DAG.getConstant(j, dl,
-                                              TLI.getVectorIdxTy()));
+      SDValue Ext = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op,
+          DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
       Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
     }
   }
@@ -3142,7 +3149,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
   SDLoc dl(N);
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
-  SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy());
+  SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl,
+                                  TLI.getVectorIdxTy(DAG.getDataLayout()));
   SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
     V0->getValueType(0).getScalarType(), V0, V1);
 
@@ -3179,8 +3187,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
 
     for (unsigned i=0; i<NumElem; ++i) {
       // Extract element from incoming vector
-      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy,
-      Incoming, DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+      SDValue Ex = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming,
+          DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
       SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex);
       NewOps.push_back(Tr);
     }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 9c297698c1db..a7392fabf1e7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1006,7 +1006,7 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
                                                   SDValue Index) {
   SDLoc dl(Index);
   // Make sure the index type is big enough to compute in.
-  Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy());
+  Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout()));
 
   // Calculate the element offset and add it to the pointer.
   unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
@@ -1030,7 +1030,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
   Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
                    DAG.getConstant(LVT.getSizeInBits(), dlHi,
-                                   TLI.getPointerTy()));
+                                   TLI.getPointerTy(DAG.getDataLayout())));
   return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
 }
 
@@ -1079,7 +1079,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
     Args.push_back(Entry);
   }
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy());
+                                         TLI.getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
 
@@ -1117,7 +1117,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
   Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
   Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
                    DAG.getConstant(LoVT.getSizeInBits(), dl,
-                                   TLI.getPointerTy()));
+                                   TLI.getPointerTy(DAG.getDataLayout())));
   Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2f2778982611..d1131a74cf17 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -73,7 +73,7 @@ private:
   }
 
   EVT getSetCCResultType(EVT VT) const {
-    return TLI.getSetCCResultType(*DAG.getContext(), VT);
+    return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
   }
 
   /// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -167,7 +167,7 @@ private:
   SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
   SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
-  
+
   std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
                                                  SDNode *Node, bool isSigned);
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -347,7 +347,7 @@ private:
 
   void ExpandIntRes_ATOMIC_LOAD       (SDNode *N, SDValue &Lo, SDValue &Hi);
 
-  void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+  void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
                              SDValue &Lo, SDValue &Hi);
   bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
   bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 330c31ce0eec..14d8f7762086 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -60,18 +60,20 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
     case TargetLowering::TypeExpandInteger:
-    case TargetLowering::TypeExpandFloat:
+    case TargetLowering::TypeExpandFloat: {
+      auto &DL = DAG.getDataLayout();
       // Convert the expanded pieces of the input.
       GetExpandedOp(InOp, Lo, Hi);
-      if (TLI.hasBigEndianPartOrdering(InVT) !=
-          TLI.hasBigEndianPartOrdering(OutVT))
+      if (TLI.hasBigEndianPartOrdering(InVT, DL) !=
+          TLI.hasBigEndianPartOrdering(OutVT, DL))
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
+    }
     case TargetLowering::TypeSplitVector:
       GetSplitVector(InOp, Lo, Hi);
-      if (TLI.hasBigEndianPartOrdering(OutVT))
+      if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -88,7 +90,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       EVT LoVT, HiVT;
       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
       std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
-      if (TLI.hasBigEndianPartOrdering(OutVT))
+      if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -119,9 +121,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
       SmallVector<SDValue, 8> Vals;
       for (unsigned i = 0; i < NumElems; ++i)
-        Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT,
-                                   CastInOp, DAG.getConstant(i, dl,
-                                             TLI.getVectorIdxTy())));
+        Vals.push_back(DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp,
+            DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
 
       // Build Lo, Hi pair by pairing extracted elements if needed.
       unsigned Slot = 0;
@@ -131,7 +133,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
         SDValue LHS = Vals[Slot];
         SDValue RHS = Vals[Slot + 1];
 
-        if (TLI.isBigEndian())
+        if (DAG.getDataLayout().isBigEndian())
           std::swap(LHS, RHS);
 
         Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
@@ -143,7 +145,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       Lo = Vals[Slot++];
       Hi = Vals[Slot++];
 
-      if (TLI.isBigEndian())
+      if (DAG.getDataLayout().isBigEndian())
         std::swap(Lo, Hi);
 
       return;
@@ -155,9 +157,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   // Create the stack frame object.  Make sure it is aligned for both
   // the source and expanded destination types.
-  unsigned Alignment =
-    TLI.getDataLayout()->getPrefTypeAlignment(NOutVT.
-                                              getTypeForEVT(*DAG.getContext()));
+  unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(
+      NOutVT.getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
@@ -182,7 +183,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
                    false, false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
-  if (TLI.hasBigEndianPartOrdering(OutVT))
+  if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout()))
     std::swap(Lo, Hi);
 }
 
@@ -241,7 +242,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                     DAG.getConstant(1, dl, Idx.getValueType()));
   Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
 
-  if (TLI.isBigEndian())
+  if (DAG.getDataLayout().isBigEndian())
     std::swap(Lo, Hi);
 }
 
@@ -282,7 +283,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
                       Hi.getValue(1));
 
   // Handle endianness of the load.
-  if (TLI.hasBigEndianPartOrdering(ValueVT))
+  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
     std::swap(Lo, Hi);
 
   // Modified the chain - switch anything that used the old chain to use
@@ -302,7 +303,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
   Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
 
   // Handle endianness of the load.
-  if (TLI.hasBigEndianPartOrdering(OVT))
+  if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout()))
     std::swap(Lo, Hi);
 
   // Modified the chain - switch anything that used the old chain to use
@@ -325,7 +326,7 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
   if (NumElements > 1) {
     NumElements >>= 1;
     SplitInteger(Op, Parts[0], Parts[1]);
-      if (TLI.isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
         std::swap(Parts[0], Parts[1]);
     IntegerToVector(Parts[0], NumElements, Ops, EltVT);
     IntegerToVector(Parts[1], NumElements, Ops, EltVT);
@@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
   for (unsigned i = 0; i < NumElts; ++i) {
     SDValue Lo, Hi;
     GetExpandedOp(N->getOperand(i), Lo, Hi);
-    if (TLI.isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
       std::swap(Lo, Hi);
     NewElts.push_back(Lo);
     NewElts.push_back(Hi);
@@ -431,7 +432,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
 
   SDValue Lo, Hi;
   GetExpandedOp(Val, Lo, Hi);
-  if (TLI.isBigEndian())
+  if (DAG.getDataLayout().isBigEndian())
     std::swap(Lo, Hi);
 
   SDValue Idx = N->getOperand(2);
@@ -481,7 +482,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   SDValue Lo, Hi;
   GetExpandedOp(St->getValue(), Lo, Hi);
 
-  if (TLI.hasBigEndianPartOrdering(ValueVT))
+  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
     std::swap(Lo, Hi);
 
   Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ee844a8a4c58..83d4ad5ea1f4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -503,7 +503,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
     // Instead, we load all significant words, mask bits off, and concatenate
     // them to form each element. Finally, they are extended to destination
     // scalar type to build the destination vector.
-    EVT WideVT = TLI.getPointerTy();
+    EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
 
     assert(WideVT.isRound() &&
            "Could not handle the sophisticated case when the widest integer is"
@@ -563,7 +563,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
       SDValue Lo, Hi, ShAmt;
 
       if (BitOffset < WideBits) {
-        ShAmt = DAG.getConstant(BitOffset, dl, TLI.getShiftAmountTy(WideVT));
+        ShAmt = DAG.getConstant(
+            BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
       }
@@ -573,8 +574,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         WideIdx++;
         BitOffset -= WideBits;
         if (BitOffset > 0) {
-          ShAmt = DAG.getConstant(SrcEltBits - BitOffset, dl,
-                                  TLI.getShiftAmountTy(WideVT));
+          ShAmt = DAG.getConstant(
+              SrcEltBits - BitOffset, dl,
+              TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
         }
@@ -592,8 +594,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
         break;
       case ISD::SEXTLOAD:
-        ShAmt = DAG.getConstant(WideBits - SrcEltBits, dl,
-                                TLI.getShiftAmountTy(WideVT));
+        ShAmt =
+            DAG.getConstant(WideBits - SrcEltBits, dl,
+                            TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
@@ -663,8 +666,9 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
   // and save them into memory individually.
   SmallVector<SDValue, 8> Stores;
   for (unsigned Idx = 0; Idx < NumElem; Idx++) {
-    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-               RegSclVT, Value, DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
+    SDValue Ex = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value,
+        DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
     // This scalar TruncStore may be illegal, but we legalize it later.
     SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
@@ -803,7 +807,7 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
 
   // Place the extended lanes into the correct locations.
   int ExtLaneScale = NumSrcElements / NumElements;
-  int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+  int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
   for (int i = 0; i < NumElements; ++i)
     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
 
@@ -858,7 +862,7 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
     ShuffleMask.push_back(i);
 
   int ExtLaneScale = NumSrcElements / NumElements;
-  int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+  int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
   for (int i = 0; i < NumElements; ++i)
     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
 
@@ -995,12 +999,15 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   SDLoc dl(Op);
   SmallVector<SDValue, 8> Ops(NumElems);
   for (unsigned i = 0; i < NumElems; ++i) {
-    SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
-                                  DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
-    SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
-                                  DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+    SDValue LHSElem = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    SDValue RHSElem = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
     Ops[i] = DAG.getNode(ISD::SETCC, dl,
-                         TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT),
+                         TLI.getSetCCResultType(DAG.getDataLayout(),
+                                                *DAG.getContext(), TmpEltVT),
                          LHSElem, RHSElem, CC);
     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
                            DAG.getConstant(APInt::getAllOnesValue
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 905492c202ca..4348ab79f7d1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -251,8 +251,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
     Op = GetScalarizedVector(Op);
   } else {
     EVT VT = OpVT.getVectorElementType();
-    Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
-                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+    Op = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+        DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
   }
   return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
 }
@@ -384,10 +385,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
     RHS = GetScalarizedVector(RHS);
   } else {
     EVT VT = OpVT.getVectorElementType();
-    LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
-                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
-    RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
-                      DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+    LHS = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+        DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    RHS = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+        DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
   }
 
   // Turn it into a scalar SETCC.
@@ -742,7 +745,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
     // expanded pieces.
     if (LoVT == HiVT) {
       GetExpandedOp(InOp, Lo, Hi);
-      if (TLI.isBigEndian())
+      if (DAG.getDataLayout().isBigEndian())
         std::swap(Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
@@ -761,12 +764,12 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
   // In the general case, convert the input to an integer and split it by hand.
   EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
   EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
-  if (TLI.isBigEndian())
+  if (DAG.getDataLayout().isBigEndian())
     std::swap(LoIntVT, HiIntVT);
 
   SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
 
-  if (TLI.isBigEndian())
+  if (DAG.getDataLayout().isBigEndian())
     std::swap(Lo, Hi);
   Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
   Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
@@ -819,7 +822,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
   uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
                    DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl,
-                                   TLI.getVectorIdxTy()));
+                                   TLI.getVectorIdxTy(DAG.getDataLayout())));
 }
 
 void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
@@ -840,7 +843,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
   // Store the new subvector into the specified index.
   SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
   Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
-  unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+  unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
   Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
                        false, false, 0);
 
@@ -898,9 +901,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
       Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
                        Lo.getValueType(), Lo, Elt, Idx);
     else
-      Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
-                       DAG.getConstant(IdxVal - LoNumElts, dl,
-                                       TLI.getVectorIdxTy()));
+      Hi =
+          DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+                      DAG.getConstant(IdxVal - LoNumElts, dl,
+                                      TLI.getVectorIdxTy(DAG.getDataLayout())));
     return;
   }
 
@@ -919,8 +923,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
   // so use a truncating store.
   SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
   Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
-  unsigned Alignment =
-    TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+  unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
   Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
                             false, false, 0);
 
@@ -1292,10 +1295,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
         Idx -= Input * NewElts;
 
         // Extract the vector element by hand.
-        SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                    Inputs[Input],
-                                    DAG.getConstant(Idx, dl,
-                                                    TLI.getVectorIdxTy())));
+        SVOps.push_back(DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input],
+            DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
       }
 
       // Construct the Lo/Hi output using a BUILD_VECTOR.
@@ -1472,7 +1474,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
   Lo = BitConvertToInteger(Lo);
   Hi = BitConvertToInteger(Hi);
 
-  if (TLI.isBigEndian())
+  if (DAG.getDataLayout().isBigEndian())
     std::swap(Lo, Hi);
 
   return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
@@ -1763,9 +1765,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
   for (const SDValue &Op : N->op_values()) {
     for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
          i != e; ++i) {
-      Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
-                                 Op, DAG.getConstant(i, DL, TLI.getVectorIdxTy())));
-
+      Elts.push_back(DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op,
+          DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
     }
   }
 
@@ -1829,10 +1831,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   // type. This should normally be something that ends up being legal directly,
   // but in theory if a target has very wide vectors and an annoyingly
   // restricted set of legal types, this split can chain to build things up.
-  return IsFloat ?
-    DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
-                DAG.getTargetConstant(0, DL, TLI.getPointerTy())) :
-    DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+  return IsFloat
+             ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
+                           DAG.getTargetConstant(
+                               0, DL, TLI.getPointerTy(DAG.getDataLayout())))
+             : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
@@ -2062,12 +2065,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
   // }
   while (CurNumElts != 0) {
     while (CurNumElts >= NumElts) {
-      SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
-                                 DAG.getConstant(Idx, dl,
-                                                 TLI.getVectorIdxTy()));
-      SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
-                                 DAG.getConstant(Idx, dl,
-                                                 TLI.getVectorIdxTy()));
+      SDValue EOp1 = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+          DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+      SDValue EOp2 = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+          DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
       ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
       Idx += NumElts;
       CurNumElts -= NumElts;
@@ -2079,14 +2082,12 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
 
     if (NumElts == 1) {
       for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
-        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp1,
-                                   DAG.getConstant(Idx, dl,
-                                                   TLI.getVectorIdxTy()));
-        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
-                                   InOp2,
-                                   DAG.getConstant(Idx, dl,
-                                                   TLI.getVectorIdxTy()));
+        SDValue EOp1 = DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1,
+            DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+        SDValue EOp2 = DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
+            DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
         ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
                                              EOp1, EOp2);
       }
@@ -2123,9 +2124,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
       SDValue VecOp = DAG.getUNDEF(NextVT);
       unsigned NumToInsert = ConcatEnd - Idx - 1;
       for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
-        VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
-                            ConcatOps[OpIdx],
-                            DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+        VecOp = DAG.getNode(
+            ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx],
+            DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
       }
       ConcatOps[Idx+1] = VecOp;
       ConcatEnd = Idx + 2;
@@ -2211,8 +2212,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     }
 
     if (InVTNumElts % WidenNumElts == 0) {
-      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
-                                  DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+      SDValue InVal = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
+          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
       // Extract the input and convert the shorten input vector.
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InVal);
@@ -2226,8 +2228,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
   unsigned i;
   for (i=0; i < MinElts; ++i) {
-    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
-                              DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
+    SDValue Val = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+        DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
     if (N->getNumOperands() == 1)
       Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
     else
@@ -2453,8 +2456,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
     if (InputWidened)
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
-      Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
+      Ops[Idx++] = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+          DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
   }
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; Idx < WidenNumElts; ++Idx)
@@ -2511,8 +2515,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
 
     if (InVTNumElts % WidenNumElts == 0) {
       // Extract the input and convert the shorten input vector.
-      InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
-                         DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+      InOp = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+          DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
       return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
     }
@@ -2527,8 +2532,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
   unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
   unsigned i;
   for (i=0; i < MinElts; ++i) {
-    SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                 DAG.getConstant(i, dl, TLI.getVectorIdxTy()));
+    SDValue ExtVal = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
     Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
                                   SatOp, CvtCode);
   }
@@ -2570,8 +2576,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
   unsigned NumElts = VT.getVectorNumElements();
   unsigned i;
   for (i=0; i < NumElts; ++i)
-    Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                         DAG.getConstant(IdxVal + i, dl, TLI.getVectorIdxTy()));
+    Ops[i] =
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                    DAG.getConstant(IdxVal + i, dl,
+                                    TLI.getVectorIdxTy(DAG.getDataLayout())));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for (; i < WidenNumElts; ++i)
@@ -2872,12 +2880,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
         assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
                "We can't have the same type as we started with!");
         if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
-          InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
-                             DAG.getUNDEF(FixedVT), InOp,
-                             DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+          InOp = DAG.getNode(
+              ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp,
+              DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
         else
-          InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
-                             DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+          InOp = DAG.getNode(
+              ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+              DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
         break;
       }
     }
@@ -2920,10 +2929,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   unsigned Opcode = N->getOpcode();
   SmallVector<SDValue, 16> Ops(NumElts);
   for (unsigned i=0; i < NumElts; ++i)
-    Ops[i] = DAG.getNode(Opcode, dl, EltVT,
-                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
-                                     DAG.getConstant(i, dl,
-                                                     TLI.getVectorIdxTy())));
+    Ops[i] = DAG.getNode(
+        Opcode, dl, EltVT,
+        DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+            DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
 
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
@@ -2943,8 +2953,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
     EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
     if (TLI.isTypeLegal(NewVT)) {
       SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
-                         DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+      return DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+          DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
     }
   }
 
@@ -2971,8 +2982,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
     if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
       InOp = GetWidenedVector(InOp);
     for (unsigned j=0; j < NumInElts; ++j)
-      Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                               DAG.getConstant(j, dl, TLI.getVectorIdxTy()));
+      Ops[Idx++] = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+          DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
   }
   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
 }
@@ -3053,7 +3065,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
 
   // Get a new SETCC node to compare the newly widened operands.
   // Only some of the compared elements are legal.
-  EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType());
+  EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                                   InOp0.getValueType());
   SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
                      SVT, InOp0, InOp1, N->getOperand(2));
 
@@ -3061,9 +3074,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
   EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
                                SVT.getVectorElementType(),
                                N->getValueType(0).getVectorNumElements());
-  SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
-                           ResVT, WideSETCC,
-                           DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+  SDValue CC = DAG.getNode(
+      ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
+      DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
   return PromoteTargetBoolean(CC, N->getValueType(0));
 }
@@ -3159,8 +3172,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
       Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
       LdTy = NewLdTy;
     }
-    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
-                        DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
+    VecOp = DAG.getNode(
+        ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+        DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
   }
   return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
@@ -3407,9 +3421,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
     if (NewVT.isVector()) {
       unsigned NumVTElts = NewVT.getVectorNumElements();
       do {
-        SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
-                                  DAG.getConstant(Idx, dl,
-                                                  TLI.getVectorIdxTy()));
+        SDValue EOp = DAG.getNode(
+            ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+            DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -3429,8 +3443,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
       // Readjust index position based on new vector type
       Idx = Idx * ValEltWidth / NewVTWidth;
       do {
-        SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
-                      DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy()));
+        SDValue EOp = DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+            DAG.getConstant(Idx++, dl,
+                            TLI.getVectorIdxTy(DAG.getDataLayout())));
         StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
                                     ST->getPointerInfo().getWithOffset(Offset),
                                        isVolatile, isNonTemporal,
@@ -3476,8 +3492,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
   EVT ValEltVT = ValVT.getVectorElementType();
   unsigned Increment = ValEltVT.getSizeInBits() / 8;
   unsigned NumElts = StVT.getVectorNumElements();
-  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+  SDValue EOp = DAG.getNode(
+      ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+      DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
   StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
                                       ST->getPointerInfo(), StEltVT,
                                       isVolatile, isNonTemporal, Align,
@@ -3488,8 +3505,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
                                      BasePtr,
                                      DAG.getConstant(Offset, dl,
                                                      BasePtr.getValueType()));
-    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
-                            DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+    SDValue EOp = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+        DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
     StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
                                       ST->getPointerInfo().getWithOffset(Offset),
                                         StEltVT, isVolatile, isNonTemporal,
@@ -3525,8 +3543,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   }
 
   if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
-                       DAG.getConstant(0, dl, TLI.getVectorIdxTy()));
+    return DAG.getNode(
+        ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+        DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
   // Fall back to extract and build.
   SmallVector<SDValue, 16> Ops(WidenNumElts);
@@ -3534,8 +3553,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
   unsigned Idx;
   for (Idx = 0; Idx < MinNumElts; ++Idx)
-    Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
-                           DAG.getConstant(Idx, dl, TLI.getVectorIdxTy()));
+    Ops[Idx] = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+        DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
   for ( ; Idx < WidenNumElts; ++Idx)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 00cbae3986cd..34e1a7001082 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -725,9 +725,8 @@ void ScheduleDAGLinearize::Schedule() {
 
   SmallVector<SDNode*, 8> Glues;
   unsigned DAGSize = 0;
-  for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
-         E = DAG->allnodes_end(); I != E; ++I) {
-    SDNode *N = I;
+  for (SDNode &Node : DAG->allnodes()) {
+    SDNode *N = &Node;
 
     // Use node id to record degree.
     unsigned Degree = N->use_size();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b22d6edd85af..2a6c853a1d11 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -289,9 +289,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
 /// ClusterNodes - Cluster certain nodes which should be scheduled together.
 ///
 void ScheduleDAGSDNodes::ClusterNodes() {
-  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
-       E = DAG->allnodes_end(); NI != E; ++NI) {
-    SDNode *Node = &*NI;
+  for (SDNode &NI : DAG->allnodes()) {
+    SDNode *Node = &NI;
     if (!Node || !Node->isMachineOpcode())
       continue;
 
@@ -308,9 +307,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
   // to their associated SUnits by holding SUnits table indices. A value
   // of -1 means the SDNode does not yet have an associated SUnit.
   unsigned NumNodes = 0;
-  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
-       E = DAG->allnodes_end(); NI != E; ++NI) {
-    NI->setNodeId(-1);
+  for (SDNode &NI : DAG->allnodes()) {
+    NI.setNodeId(-1);
     ++NumNodes;
   }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index be5478275f99..14f44ccc60ce 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -151,8 +151,8 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
   if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
 
   bool IsAllUndef = true;
-  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
-    if (N->getOperand(i).getOpcode() == ISD::UNDEF)
+  for (const SDValue &Op : N->op_values()) {
+    if (Op.getOpcode() == ISD::UNDEF)
       continue;
     IsAllUndef = false;
     // Do not accept build_vectors that aren't all constants or which have non-0
@@ -163,12 +163,11 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
     // We only want to check enough bits to cover the vector elements, because
     // we care if the resultant vector is all zeros, not whether the individual
     // constants are.
-    SDValue Zero = N->getOperand(i);
     unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
       if (CN->getAPIntValue().countTrailingZeros() < EltSize)
         return false;
-    } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+    } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
       if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
         return false;
     } else
@@ -921,7 +920,7 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
                    PointerType::get(Type::getInt8Ty(*getContext()), 0) :
                    VT.getTypeForEVT(*getContext());
 
-  return TLI->getDataLayout()->getABITypeAlignment(Ty);
+  return getDataLayout().getABITypeAlignment(Ty);
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
@@ -1184,7 +1183,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
 
     // EltParts is currently in little endian order. If we actually want
     // big-endian order then reverse it now.
-    if (TLI->isBigEndian())
+    if (getDataLayout().isBigEndian())
       std::reverse(EltParts.begin(), EltParts.end());
 
     // The elements must be reversed when the element order is different
@@ -1234,7 +1233,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT,
 }
 
 SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) {
-  return getConstant(Val, DL, TLI->getPointerTy(), isTarget);
+  return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
 }
 
 SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT,
@@ -1303,7 +1302,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
          "Cannot set target flags on target-independent globals");
 
   // Truncate (with sign-extension) the offset value to the pointer size.
-  unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType());
+  unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
   if (BitWidth < 64)
     Offset = SignExtend64(Offset, BitWidth);
 
@@ -1373,7 +1372,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
+    Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1400,7 +1399,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType());
+    Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1850,7 +1849,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
 /// the target's desired shift amount type.
 SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
   EVT OpTy = Op.getValueType();
-  EVT ShTy = TLI->getShiftAmountTy(LHSTy);
+  EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
   ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
@@ -1864,10 +1863,10 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
   unsigned ByteSize = VT.getStoreSize();
   Type *Ty = VT.getTypeForEVT(*getContext());
   unsigned StackAlign =
-  std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign);
+      std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
 
   int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
-  return getFrameIndex(FrameIdx, TLI->getPointerTy());
+  return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
 }
 
 /// CreateStackTemporary - Create a stack temporary suitable for holding
@@ -1877,13 +1876,13 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
                             VT2.getStoreSizeInBits())/8;
   Type *Ty1 = VT1.getTypeForEVT(*getContext());
   Type *Ty2 = VT2.getTypeForEVT(*getContext());
-  const DataLayout *TD = TLI->getDataLayout();
-  unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
-                            TD->getPrefTypeAlignment(Ty2));
+  const DataLayout &DL = getDataLayout();
+  unsigned Align =
+      std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
 
   MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
-  return getFrameIndex(FrameIdx, TLI->getPointerTy());
+  return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
 }
 
 SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
@@ -1916,9 +1915,9 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
     break;
   }
 
-  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
     const APInt &C2 = N2C->getAPIntValue();
-    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
       const APInt &C1 = N1C->getAPIntValue();
 
       switch (Cond) {
@@ -1936,8 +1935,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
       }
     }
   }
-  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
-    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
+    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
       APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
       switch (Cond) {
       default: break;
@@ -2356,15 +2355,24 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
+    // Output known-0 bits are also known if the top bits of each input are
+    // known to be clear. For example, if one input has the top 10 bits clear
+    // and the other has the top 8 bits clear, we know the top 7 bits of the
+    // output must be clear.
     computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
-    unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+    unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
+    unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
 
     computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
-    KnownZeroOut = std::min(KnownZeroOut,
+    KnownZeroHigh = std::min(KnownZeroHigh,
+                             KnownZero2.countLeadingOnes());
+    KnownZeroLow = std::min(KnownZeroLow,
                             KnownZero2.countTrailingOnes());
 
     if (Op.getOpcode() == ISD::ADD) {
-      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
+      if (KnownZeroHigh > 1)
+        KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
       break;
     }
 
@@ -2372,8 +2380,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // information if we know (at least) that the low two bits are clear.  We
     // then return to the caller that the low bit is unknown but that other bits
     // are known zero.
-    if (KnownZeroOut >= 2) // ADDE
-      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+    if (KnownZeroLow >= 2) // ADDE
+      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow);
     break;
   }
   case ISD::SREM:
@@ -2814,7 +2822,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
   // doesn't create new constants with different values. Nevertheless, the
   // opaque flag is preserved during folding to prevent future folding with
   // other constants.
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
     const APInt &Val = C->getAPIntValue();
     switch (Opcode) {
     default: break;
@@ -2861,7 +2869,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
   }
 
   // Constant fold unary operations with a floating point constant operand.
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
     APFloat V = C->getValueAPF();    // make copy
     switch (Opcode) {
     case ISD::FNEG:
@@ -2922,7 +2930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
   }
 
   // Constant fold unary operations with a vector integer or float operand.
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand.getNode())) {
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) {
     if (BV->isConstant()) {
       switch (Opcode) {
       default:
@@ -3278,8 +3286,8 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
 
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
                               SDValue N2, const SDNodeFlags *Flags) {
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
   switch (Opcode) {
   default: break;
   case ISD::TokenFactor:
@@ -3499,7 +3507,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
           Ops.push_back(Op);
           continue;
         }
-        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode())) {
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
           APInt Val = C->getAPIntValue();
           Ops.push_back(SignExtendInReg(Val));
           continue;
@@ -3554,7 +3562,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       // if the indices are known different, extract the element from
       // the original vector.
       SDValue N1Op2 = N1.getOperand(2);
-      ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+      ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2);
 
       if (N1Op2C && N2C) {
         if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
@@ -3600,9 +3608,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
              "Extract subvector must be from larger vector to smaller vector!");
 
-      if (isa<ConstantSDNode>(Index.getNode())) {
+      if (isa<ConstantSDNode>(Index)) {
         assert((VT.getVectorNumElements() +
-                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                cast<ConstantSDNode>(Index)->getZExtValue()
                 <= N1.getValueType().getVectorNumElements())
                && "Extract subvector overflow!");
       }
@@ -3628,8 +3636,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
 
   // Constant fold FP operations.
   bool HasFPExceptions = TLI->hasFloatingPointExceptions();
-  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
-  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
   if (N1CFP) {
     if (!N2CFP && isCommutativeBinOp(Opcode)) {
       // Canonicalize constant to RHS if commutative.
@@ -3787,7 +3795,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3) {
   // Perform various simplifications.
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   switch (Opcode) {
   case ISD::FMA: {
     ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -3845,9 +3853,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
              "Dest and insert subvector source types must match!");
       assert(N2.getSimpleValueType() <= N1.getSimpleValueType() &&
              "Insert subvector must be from smaller vector to larger vector!");
-      if (isa<ConstantSDNode>(Index.getNode())) {
+      if (isa<ConstantSDNode>(Index)) {
         assert((N2.getValueType().getVectorNumElements() +
-                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                cast<ConstantSDNode>(Index)->getZExtValue()
                 <= VT.getVectorNumElements())
                && "Insert subvector overflow!");
       }
@@ -3994,7 +4002,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
   unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
 
   APInt Val(NumVTBits, 0);
-  if (TLI.isLittleEndian()) {
+  if (DAG.getDataLayout().isLittleEndian()) {
     for (unsigned i = 0; i != NumBytes; ++i)
       Val |= (uint64_t)(unsigned char)Str[i] << i*8;
   } else {
@@ -4066,9 +4074,9 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
 
   if (VT == MVT::Other) {
     unsigned AS = 0;
-    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
+    if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(AS) ||
         TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) {
-      VT = TLI.getPointerTy();
+      VT = TLI.getPointerTy(DAG.getDataLayout());
     } else {
       switch (DstAlign & 7) {
       case 0:  VT = MVT::i64; break;
@@ -4185,14 +4193,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
 
     // Don't promote to an alignment that would require dynamic stack
     // realignment.
     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
     if (!TRI->needsStackRealignment(MF))
-       while (NewAlign > Align &&
-             TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign))
+      while (NewAlign > Align &&
+             DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign))
           NewAlign /= 2;
 
     if (NewAlign > Align) {
@@ -4294,7 +4302,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -4385,7 +4393,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
 
   if (DstAlignCanChange) {
     Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
-    unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty);
+    unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
       if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
@@ -4488,19 +4496,21 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext());
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME: pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
-  CLI.setDebugLoc(dl).setChain(Chain)
-    .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
-               Type::getVoidTy(*getContext()),
-               getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
-                                 TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult()
-    .setTailCall(isTailCall);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+                 Type::getVoidTy(*getContext()),
+                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+                                   TLI->getPointerTy(getDataLayout())),
+                 std::move(Args), 0)
+      .setDiscardResult()
+      .setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
@@ -4544,19 +4554,21 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
   // Emit a library call.
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext());
+  Entry.Ty = getDataLayout().getIntPtrType(*getContext());
   Entry.Node = Dst; Args.push_back(Entry);
   Entry.Node = Src; Args.push_back(Entry);
   Entry.Node = Size; Args.push_back(Entry);
   // FIXME:  pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
-  CLI.setDebugLoc(dl).setChain(Chain)
-    .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
-               Type::getVoidTy(*getContext()),
-               getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
-                                 TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult()
-    .setTailCall(isTailCall);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+                 Type::getVoidTy(*getContext()),
+                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+                                   TLI->getPointerTy(getDataLayout())),
+                 std::move(Args), 0)
+      .setDiscardResult()
+      .setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
@@ -4594,7 +4606,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
   }
 
   // Emit a library call.
-  Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext());
+  Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   Entry.Node = Dst; Entry.Ty = IntPtrTy;
@@ -4608,13 +4620,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
 
   // FIXME: pass in SDLoc
   TargetLowering::CallLoweringInfo CLI(*this);
-  CLI.setDebugLoc(dl).setChain(Chain)
-    .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
-               Type::getVoidTy(*getContext()),
-               getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
-                                 TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult()
-    .setTailCall(isTailCall);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+                 Type::getVoidTy(*getContext()),
+                 getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+                                   TLI->getPointerTy(getDataLayout())),
+                 std::move(Args), 0)
+      .setDiscardResult()
+      .setTailCall(isTailCall);
 
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   return CallResult.second;
@@ -6656,7 +6670,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
 
 /// isOnlyUserOf - Return true if this node is the only use of N.
 ///
-bool SDNode::isOnlyUserOf(SDNode *N) const {
+bool SDNode::isOnlyUserOf(const SDNode *N) const {
   bool Seen = false;
   for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
     SDNode *User = *I;
@@ -6671,16 +6685,16 @@ bool SDNode::isOnlyUserOf(SDNode *N) const {
 
 /// isOperand - Return true if this node is an operand of N.
 ///
-bool SDValue::isOperandOf(SDNode *N) const {
+bool SDValue::isOperandOf(const SDNode *N) const {
   for (const SDValue &Op : N->op_values())
     if (*this == Op)
       return true;
   return false;
 }
 
-bool SDNode::isOperandOf(SDNode *N) const {
-  for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
-    if (this == N->OperandList[i].getNode())
+bool SDNode::isOperandOf(const SDNode *N) const {
+  for (const SDValue &Op : N->op_values())
+    if (this == Op.getNode())
       return true;
   return false;
 }
@@ -6784,10 +6798,9 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
       if (OperandVT.isVector()) {
         // A vector operand; extract a single element.
         EVT OperandEltVT = OperandVT.getVectorElementType();
-        Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                              OperandEltVT,
-                              Operand,
-                              getConstant(i, dl, TLI->getVectorIdxTy()));
+        Operands[j] =
+            getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+                    getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout())));
       } else {
         // A scalar operand; just use it as is.
         Operands[j] = Operand;
@@ -6891,10 +6904,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
+    unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
     APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
     llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne,
-                           *TLI->getDataLayout());
+                           getDataLayout());
     unsigned AlignBits = KnownZero.countTrailingOnes();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
     if (Align)
@@ -6950,10 +6963,10 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
          "More vector elements requested than available!");
   SDValue Lo, Hi;
   Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
-               getConstant(0, DL, TLI->getVectorIdxTy()));
+               getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
   Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N,
                getConstant(LoVT.getVectorNumElements(), DL,
-                           TLI->getVectorIdxTy()));
+                           TLI->getVectorIdxTy(getDataLayout())));
   return std::make_pair(Lo, Hi);
 }
 
@@ -6965,7 +6978,7 @@ void SelectionDAG::ExtractVectorElements(SDValue Op,
     Count = VT.getVectorNumElements();
 
   EVT EltVT = VT.getVectorElementType();
-  EVT IdxTy = TLI->getVectorIdxTy();
+  EVT IdxTy = TLI->getVectorIdxTy(getDataLayout());
   SDLoc SL(Op);
   for (unsigned i = Start, e = Start + Count; i != e; ++i) {
     Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
@@ -7080,14 +7093,12 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
 
 ConstantSDNode *
 BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
-  return dyn_cast_or_null<ConstantSDNode>(
-      getSplatValue(UndefElements).getNode());
+  return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
 }
 
 ConstantFPSDNode *
 BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
-  return dyn_cast_or_null<ConstantFPSDNode>(
-      getSplatValue(UndefElements).getNode());
+  return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
 }
 
 bool BuildVectorSDNode::isConstant() const {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4897082f89aa..2c3c0eb101a0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -146,7 +146,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
         Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
       }
 
-      if (TLI.isBigEndian())
+      if (DAG.getDataLayout().isBigEndian())
         std::swap(Lo, Hi);
 
       Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
@@ -160,13 +160,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
 
         // Combine the round and odd parts.
         Lo = Val;
-        if (TLI.isBigEndian())
+        if (DAG.getDataLayout().isBigEndian())
           std::swap(Lo, Hi);
         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
         Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
-        Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
-                         DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
-                                         TLI.getPointerTy()));
+        Hi =
+            DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+                        DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
+                                        TLI.getPointerTy(DAG.getDataLayout())));
         Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
         Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
       }
@@ -177,7 +178,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
       SDValue Lo, Hi;
       Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
       Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
-      if (TLI.hasBigEndianPartOrdering(ValueVT))
+      if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
         std::swap(Lo, Hi);
       Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
     } else {
@@ -211,8 +212,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
   if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
-      return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
-                         DAG.getTargetConstant(1, DL, TLI.getPointerTy()));
+      return DAG.getNode(
+          ISD::FP_ROUND, DL, ValueVT, Val,
+          DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
 
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
@@ -305,8 +307,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
     if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
       assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
              "Cannot narrow, it would be a lossy transformation");
-      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
-                         DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+      return DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
     }
 
     // Vector/Vector bitcast.
@@ -362,10 +365,10 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
   if (ValueVT.isVector())
     return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
-  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+         "Copying to an illegal type!");
 
   if (NumParts == 0)
     return;
@@ -433,7 +436,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
                                  DAG.getIntPtrConstant(RoundBits, DL));
     getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
 
-    if (TLI.isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
       // The odd parts were reversed by getCopyToParts - unreverse them.
       std::reverse(Parts + RoundParts, Parts + NumParts);
 
@@ -468,7 +471,7 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
     }
   }
 
-  if (TLI.isBigEndian())
+  if (DAG.getDataLayout().isBigEndian())
     std::reverse(Parts, Parts + OrigNumParts);
 }
 
@@ -497,9 +500,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       // undef elements.
       SmallVector<SDValue, 16> Ops;
       for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
-        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                                  ElementVT, Val, DAG.getConstant(i, DL,
-                                                  TLI.getVectorIdxTy())));
+        Ops.push_back(DAG.getNode(
+            ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
+            DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
 
       for (unsigned i = ValueVT.getVectorNumElements(),
            e = PartVT.getVectorNumElements(); i != e; ++i)
@@ -524,9 +527,9 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
       // Vector -> scalar conversion.
       assert(ValueVT.getVectorNumElements() == 1 &&
              "Only trivial vector-to-scalar conversions should get here!");
-      Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                        PartVT, Val,
-                        DAG.getConstant(0, DL, TLI.getVectorIdxTy()));
+      Val = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
       bool Smaller = ValueVT.bitsLE(PartVT);
       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
@@ -554,14 +557,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
   SmallVector<SDValue, 8> Ops(NumIntermediates);
   for (unsigned i = 0; i != NumIntermediates; ++i) {
     if (IntermediateVT.isVector())
-      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
-                           IntermediateVT, Val,
-                   DAG.getConstant(i * (NumElements / NumIntermediates), DL,
-                                   TLI.getVectorIdxTy()));
+      Ops[i] =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
+                      DAG.getConstant(i * (NumElements / NumIntermediates), DL,
+                                      TLI.getVectorIdxTy(DAG.getDataLayout())));
     else
-      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                           IntermediateVT, Val,
-                           DAG.getConstant(i, DL, TLI.getVectorIdxTy()));
+      Ops[i] = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
+          DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
   }
 
   // Split the intermediate operands into legal parts.
@@ -588,14 +591,14 @@ RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
                            EVT valuevt)
     : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
 
-RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &tli,
-                           unsigned Reg, Type *Ty) {
-  ComputeValueVTs(tli, Ty, ValueVTs);
+RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+                           const DataLayout &DL, unsigned Reg, Type *Ty) {
+  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
 
   for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
     EVT ValueVT = ValueVTs[Value];
-    unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
-    MVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+    unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
+    MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i)
       Regs.push_back(Reg + i);
     RegVTs.push_back(RegisterVT);
@@ -796,7 +799,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
       if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
         // If we clobbered the stack pointer, MFI should know about it.
         assert(DAG.getMachineFunction().getFrameInfo()->
-            hasInlineAsmWithSPAdjust());
+            hasOpaqueSPAdjustment());
       }
     }
   }
@@ -807,7 +810,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
   AA = &aa;
   GFI = gfi;
   LibInfo = li;
-  DL = DAG.getTarget().getDataLayout();
+  DL = &DAG.getDataLayout();
   Context = DAG.getContext();
   LPadToCallSiteMap.clear();
 }
@@ -964,8 +967,8 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
 
   if (It != FuncInfo.ValueMap.end()) {
     unsigned InReg = It->second;
-    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg,
-                     Ty);
+    RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
+                     DAG.getDataLayout(), InReg, Ty);
     SDValue Chain = DAG.getEntryNode();
     Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
     resolveDanglingDebugInfo(V, Result);
@@ -1031,7 +1034,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   if (const Constant *C = dyn_cast<Constant>(V)) {
-    EVT VT = TLI.getValueType(V->getType(), true);
+    EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
       return DAG.getConstant(*CI, getCurSDLoc(), VT);
@@ -1041,7 +1044,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
 
     if (isa<ConstantPointerNull>(C)) {
       unsigned AS = V->getType()->getPointerAddressSpace();
-      return DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(AS));
+      return DAG.getConstant(0, getCurSDLoc(),
+                             TLI.getPointerTy(DAG.getDataLayout(), AS));
     }
 
     if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
@@ -1095,7 +1099,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
              "Unknown struct or array constant!");
 
       SmallVector<EVT, 4> ValueVTs;
-      ComputeValueVTs(TLI, C->getType(), ValueVTs);
+      ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
       unsigned NumElts = ValueVTs.size();
       if (NumElts == 0)
         return SDValue(); // empty struct
@@ -1127,7 +1131,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
         Ops.push_back(getValue(CV->getOperand(i)));
     } else {
       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
-      EVT EltVT = TLI.getValueType(VecTy->getElementType());
+      EVT EltVT =
+          TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
 
       SDValue Op;
       if (EltVT.isFloatingPoint())
@@ -1147,13 +1152,15 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     DenseMap<const AllocaInst*, int>::iterator SI =
       FuncInfo.StaticAllocaMap.find(AI);
     if (SI != FuncInfo.StaticAllocaMap.end())
-      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+      return DAG.getFrameIndex(SI->second,
+                               TLI.getPointerTy(DAG.getDataLayout()));
   }
 
   // If this is an instruction which fast-isel has deferred, select it now.
   if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
-    RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+    RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
+                     Inst->getType());
     SDValue Chain = DAG.getEntryNode();
     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
   }
@@ -1163,6 +1170,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
 
 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  auto &DL = DAG.getDataLayout();
   SDValue Chain = getControlRoot();
   SmallVector<ISD::OutputArg, 8> Outs;
   SmallVector<SDValue, 8> OutVals;
@@ -1175,7 +1183,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     // Leave Outs empty so that LowerReturn won't try to load return
     // registers the usual way.
     SmallVector<EVT, 1> PtrValueVTs;
-    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+    ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
                     PtrValueVTs);
 
     SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
@@ -1183,7 +1191,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
 
     SmallVector<EVT, 4> ValueVTs;
     SmallVector<uint64_t, 4> Offsets;
-    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
     unsigned NumValues = ValueVTs.size();
 
     SmallVector<SDValue, 4> Chains(NumValues);
@@ -1203,7 +1211,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                         MVT::Other, Chains);
   } else if (I.getNumOperands() != 0) {
     SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+    ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
     unsigned NumValues = ValueVTs.size();
     if (NumValues) {
       SDValue RetOp = getValue(I.getOperand(0));
@@ -1692,7 +1700,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
   // Emit the code for the jump table
   assert(JT.Reg != -1U && "Should lower JT Header first!");
-  EVT PTy = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
                                      JT.Reg, PTy);
   SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
@@ -1723,9 +1731,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // This value may be smaller or larger than the target's pointer type, and
   // therefore require extension or truncating.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy());
+  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
 
-  unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+  unsigned JumpTableReg =
+      FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
                                     JumpTableReg, SwitchOp);
   JT.Reg = JumpTableReg;
@@ -1733,11 +1742,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   // Emit the range check for the jump table, and branch to the default block
   // for the switch statement if the value being switched on exceeds the largest
   // case in the switch.
-  SDValue CMP =
-      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
-                                              Sub.getValueType()),
-                   Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT),
-                   ISD::SETUGT);
+  SDValue CMP = DAG.getSetCC(
+      dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                                 Sub.getValueType()),
+      Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
 
   SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
                                MVT::Other, CopyTo, CMP,
@@ -1762,7 +1770,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
 
   // First create the loads to the guard/stack slot for the comparison.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT PtrTy = TLI.getPointerTy();
+  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
 
   MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
   int FI = MFI->getStackProtectorIndex();
@@ -1771,8 +1779,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   SDValue GuardPtr = getValue(IRGuard);
   SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
 
-  unsigned Align =
-    TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType());
+  unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType());
 
   SDValue Guard;
   SDLoc dl = getCurSDLoc();
@@ -1799,10 +1806,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   EVT VT = Guard.getValueType();
   SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
 
-  SDValue Cmp =
-      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
-                                                         Sub.getValueType()),
-                   Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
+  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
+                                                        *DAG.getContext(),
+                                                        Sub.getValueType()),
+                             Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
 
   // If the sub is not 0, then we know the guard/stackslot do not equal, so
   // branch to failure MBB.
@@ -1848,10 +1855,10 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 
   // Check range
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SDValue RangeCmp =
-      DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(),
-                                              Sub.getValueType()),
-                   Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
+  SDValue RangeCmp = DAG.getSetCC(
+      dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                                 Sub.getValueType()),
+      Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
 
   // Determine the type of the test operands.
   bool UsePtrType = false;
@@ -1867,7 +1874,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
       }
   }
   if (UsePtrType) {
-    VT = TLI.getPointerTy();
+    VT = TLI.getPointerTy(DAG.getDataLayout());
     Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
   }
 
@@ -1909,13 +1916,15 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(
-        dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
-        DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), ISD::SETEQ);
+        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+        ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
+        ISD::SETEQ);
   } else if (PopCount == BB.Range) {
     // There is only one zero bit in the range, test for it directly.
     Cmp = DAG.getSetCC(
-        dl, TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp,
-        DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), ISD::SETNE);
+        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+        ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
+        ISD::SETNE);
   } else {
     // Make desired shift
     SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
@@ -1924,8 +1933,9 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
     // Emit bit tests and jumps
     SDValue AndOp = DAG.getNode(ISD::AND, dl,
                                 VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
-    Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp,
-                       DAG.getConstant(0, dl, VT), ISD::SETNE);
+    Cmp = DAG.getSetCC(
+        dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
+        AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
   }
 
   // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
@@ -2013,7 +2023,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
 
   SmallVector<EVT, 2> ValueVTs;
   SDLoc dl = getCurSDLoc();
-  ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
   assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
 
   // Get the two live-in registers as SDValues. The physregs have already been
@@ -2022,14 +2032,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   if (FuncInfo.ExceptionPointerVirtReg) {
     Ops[0] = DAG.getZExtOrTrunc(
         DAG.getCopyFromReg(DAG.getEntryNode(), dl,
-                           FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()),
+                           FuncInfo.ExceptionPointerVirtReg,
+                           TLI.getPointerTy(DAG.getDataLayout())),
         dl, ValueVTs[0]);
   } else {
-    Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy());
+    Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
   }
   Ops[1] = DAG.getZExtOrTrunc(
       DAG.getCopyFromReg(DAG.getEntryNode(), dl,
-                         FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()),
+                         FuncInfo.ExceptionSelectorVirtReg,
+                         TLI.getPointerTy(DAG.getDataLayout())),
       dl, ValueVTs[1]);
 
   // Merge into one.
@@ -2038,28 +2050,6 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   setValue(&LP, Res);
 }
 
-unsigned
-SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV,
-                                             MachineBasicBlock *LPadBB) {
-  SDValue Chain = getControlRoot();
-  SDLoc dl = getCurSDLoc();
-
-  // Get the typeid that we will dispatch on later.
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy());
-  unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
-  unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV);
-  SDValue Sel = DAG.getConstant(TypeID, dl, TLI.getPointerTy());
-  Chain = DAG.getCopyToReg(Chain, dl, VReg, Sel);
-
-  // Branch to the main landing pad block.
-  MachineBasicBlock *ClauseMBB = FuncInfo.MBB;
-  ClauseMBB->addSuccessor(LPadBB);
-  DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, Chain,
-                          DAG.getBasicBlock(LPadBB)));
-  return VReg;
-}
-
 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
 #ifndef NDEBUG
   for (const CaseCluster &CC : Clusters)
@@ -2186,8 +2176,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
 
-  EVT ShiftTy =
-      DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType());
+  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
+      Op2.getValueType(), DAG.getDataLayout());
 
   // Coerce the shift amount to the right type if we can.
   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
@@ -2256,7 +2246,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Opcode = getICmpCondCode(predicate);
 
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
 }
 
@@ -2271,13 +2262,15 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
   ISD::CondCode Condition = getFCmpCondCode(predicate);
   if (TM.Options.NoNaNsFPMath)
     Condition = getFCmpCodeWithoutNaN(Condition);
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
 }
 
 void SelectionDAGBuilder::visitSelect(const User &I) {
   SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs);
+  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
+                  ValueVTs);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0) return;
 
@@ -2336,7 +2329,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
 void SelectionDAGBuilder::visitTrunc(const User &I) {
   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
 }
 
@@ -2344,7 +2338,8 @@ void SelectionDAGBuilder::visitZExt(const User &I) {
   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
 }
 
@@ -2352,7 +2347,8 @@ void SelectionDAGBuilder::visitSExt(const User &I) {
   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
 }
 
@@ -2361,43 +2357,49 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   SDLoc dl = getCurSDLoc();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
-                           DAG.getTargetConstant(0, dl, TLI.getPointerTy())));
+                           DAG.getTargetConstant(
+                               0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I) {
   // FPExt is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPToUI(const User &I) {
   // FPToUI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitFPToSI(const User &I) {
   // FPToSI is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitUIToFP(const User &I) {
   // UIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
 }
 
 void SelectionDAGBuilder::visitSIToFP(const User &I) {
   // SIToFP is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
 }
 
@@ -2405,7 +2407,8 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
 }
 
@@ -2413,14 +2416,16 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
   // What to do depends on the size of the integer and the size of the pointer.
   // We can either truncate, zero extend, or no-op, accordingly.
   SDValue N = getValue(I.getOperand(0));
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
   setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
 }
 
 void SelectionDAGBuilder::visitBitCast(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   SDLoc dl = getCurSDLoc();
-  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType());
+  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                        I.getType());
 
   // BitCast assures us that source and destination are the same size so this is
   // either a BITCAST or a no-op.
@@ -2442,7 +2447,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   const Value *SV = I.getOperand(0);
   SDValue N = getValue(SV);
-  EVT DestVT = TLI.getValueType(I.getType());
+  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
   unsigned SrcAS = SV->getType()->getPointerAddressSpace();
   unsigned DestAS = I.getType()->getPointerAddressSpace();
@@ -2457,19 +2462,21 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue InVec = getValue(I.getOperand(0));
   SDValue InVal = getValue(I.getOperand(1));
-  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)),
-                                     getCurSDLoc(), TLI.getVectorIdxTy());
+  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
+                                     TLI.getVectorIdxTy(DAG.getDataLayout()));
   setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
-                           TLI.getValueType(I.getType()), InVec, InVal, InIdx));
+                           TLI.getValueType(DAG.getDataLayout(), I.getType()),
+                           InVec, InVal, InIdx));
 }
 
 void SelectionDAGBuilder::visitExtractElement(const User &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue InVec = getValue(I.getOperand(0));
-  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)),
-                                     getCurSDLoc(), TLI.getVectorIdxTy());
+  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
+                                     TLI.getVectorIdxTy(DAG.getDataLayout()));
   setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
-                           TLI.getValueType(I.getType()), InVec, InIdx));
+                           TLI.getValueType(DAG.getDataLayout(), I.getType()),
+                           InVec, InIdx));
 }
 
 // Utility for visitShuffleVector - Return true if every element in Mask,
@@ -2492,7 +2499,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   unsigned MaskNumElts = Mask.size();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = TLI.getValueType(I.getType());
+  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   EVT SrcVT = Src1.getValueType();
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
 
@@ -2614,7 +2621,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
           SDLoc dl = getCurSDLoc();
           Src = DAG.getNode(
               ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
-              DAG.getConstant(StartIdx[Input], dl, TLI.getVectorIdxTy()));
+              DAG.getConstant(StartIdx[Input], dl,
+                              TLI.getVectorIdxTy(DAG.getDataLayout())));
         }
       }
 
@@ -2641,7 +2649,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   // replacing the shuffle with extract and build vector.
   // to insert and build vector.
   EVT EltVT = VT.getVectorElementType();
-  EVT IdxVT = TLI.getVectorIdxTy();
+  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
   SDLoc dl = getCurSDLoc();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2676,9 +2684,9 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SmallVector<EVT, 4> AggValueVTs;
-  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
   SmallVector<EVT, 4> ValValueVTs;
-  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
 
   unsigned NumAggValues = AggValueVTs.size();
   unsigned NumValValues = ValValueVTs.size();
@@ -2722,7 +2730,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SmallVector<EVT, 4> ValValueVTs;
-  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
 
   unsigned NumValValues = ValValueVTs.size();
 
@@ -2755,6 +2763,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(Op0);
   SDLoc dl = getCurSDLoc();
 
+  // Normalize Vector GEP - all scalar operands should be converted to the
+  // splat vector.
+  unsigned VectorWidth = I.getType()->isVectorTy() ?
+    cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+
+  if (VectorWidth && !N.getValueType().isVector()) {
+    MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth);
+    SmallVector<SDValue, 16> Ops(VectorWidth, N);
+    N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+  }
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
     const Value *Idx = *OI;
@@ -2770,16 +2788,25 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       Ty = StTy->getElementType(Field);
     } else {
       Ty = cast<SequentialType>(Ty)->getElementType();
-      MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS);
+      MVT PtrTy =
+          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS);
       unsigned PtrSize = PtrTy.getSizeInBits();
       APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty));
 
-      // If this is a constant subscript, handle it quickly.
-      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+      // If this is a scalar constant or a splat vector of constants,
+      // handle it quickly.
+      const auto *CI = dyn_cast<ConstantInt>(Idx);
+      if (!CI && isa<ConstantDataVector>(Idx) &&
+          cast<ConstantDataVector>(Idx)->getSplatValue())
+        CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+
+      if (CI) {
         if (CI->isZero())
           continue;
         APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
-        SDValue OffsVal = DAG.getConstant(Offs, dl, PtrTy);
+        SDValue OffsVal = VectorWidth ?
+          DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+          DAG.getConstant(Offs, dl, PtrTy);
         N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal);
         continue;
       }
@@ -2787,6 +2814,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       // N = N + Idx * ElementSize;
       SDValue IdxN = getValue(Idx);
 
+      if (!IdxN.getValueType().isVector() && VectorWidth) {
+        MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
+        SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
+        IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);      
+      }
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
@@ -2823,14 +2855,14 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   SDLoc dl = getCurSDLoc();
   Type *Ty = I.getAllocatedType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+  auto &DL = DAG.getDataLayout();
+  uint64_t TySize = DL.getTypeAllocSize(Ty);
   unsigned Align =
-      std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
-               I.getAlignment());
+      std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
 
   SDValue AllocSize = getValue(I.getArraySize());
 
-  EVT IntPtr = TLI.getPointerTy();
+  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
   if (AllocSize.getValueType() != IntPtr)
     AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
 
@@ -2898,7 +2930,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0)
     return;
@@ -2975,8 +3007,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(),
-                  ValueVTs, &Offsets);
+  ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
+                  SrcV->getType(), ValueVTs, &Offsets);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0)
     return;
@@ -3077,9 +3109,10 @@ static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
   else if (SDB->findValue(ShuffleInst)) {
     SDValue ShuffleNode = SDB->getValue(ShuffleInst);
     SDLoc sdl = ShuffleNode;
-    Base = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl,
-                       ShuffleNode.getValueType().getScalarType(), ShuffleNode,
-                       DAG.getConstant(0, sdl, TLI.getVectorIdxTy()));
+    Base = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, sdl,
+        ShuffleNode.getValueType().getScalarType(), ShuffleNode,
+        DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
     SDB->setValue(Ptr, Base);
   }
   else
@@ -3126,7 +3159,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
                          MachineMemOperand::MOStore,  VT.getStoreSize(),
                          Alignment, AAInfo);
   if (!UniformBase) {
-    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
     Index = getValue(Ptr);
   }
   SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
@@ -3146,7 +3179,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
   SDValue Mask = getValue(I.getArgOperand(2));
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = TLI.getValueType(I.getType());
+  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
   if (!Alignment)
     Alignment = DAG.getEVTAlignment(VT);
@@ -3184,7 +3217,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   SDValue Mask = getValue(I.getArgOperand(2));
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = TLI.getValueType(I.getType());
+  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
   unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
   if (!Alignment)
     Alignment = DAG.getEVTAlignment(VT);
@@ -3214,7 +3247,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
                          Alignment, AAInfo, Ranges);
 
   if (!UniformBase) {
-    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy());
+    Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
     Index = getValue(Ptr);
   }
   SDValue Ops[] = { Root, Src0, Mask, Base, Index };
@@ -3291,8 +3324,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Ops[3];
   Ops[0] = getRoot();
-  Ops[1] = DAG.getConstant(I.getOrdering(), dl, TLI.getPointerTy());
-  Ops[2] = DAG.getConstant(I.getSynchScope(), dl, TLI.getPointerTy());
+  Ops[1] = DAG.getConstant(I.getOrdering(), dl,
+                           TLI.getPointerTy(DAG.getDataLayout()));
+  Ops[2] = DAG.getConstant(I.getSynchScope(), dl,
+                           TLI.getPointerTy(DAG.getDataLayout()));
   DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
 }
 
@@ -3304,7 +3339,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
   SDValue InChain = getRoot();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = TLI.getValueType(I.getType());
+  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
   if (I.getAlignment() < VT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic load");
@@ -3339,7 +3374,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
   SDValue InChain = getRoot();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = TLI.getValueType(I.getValueOperand()->getType());
+  EVT VT =
+      TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
 
   if (I.getAlignment() < VT.getSizeInBits() / 8)
     report_fatal_error("Cannot generate unaligned atomic store");
@@ -3382,7 +3418,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
       Info.opc == ISD::INTRINSIC_W_CHAIN)
     Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
-                                        TLI.getPointerTy()));
+                                        TLI.getPointerTy(DAG.getDataLayout())));
 
   // Add all operands of the call to the operand list.
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
@@ -3391,7 +3427,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   }
 
   SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
 
   if (HasChain)
     ValueVTs.push_back(MVT::Other);
@@ -3425,7 +3461,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
 
   if (!I.getType()->isVoidTy()) {
     if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
-      EVT VT = TLI.getValueType(PTy);
+      EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
       Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
     }
 
@@ -3458,8 +3494,9 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
             SDLoc dl) {
   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
                            DAG.getConstant(0x7f800000, dl, MVT::i32));
-  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
-                           DAG.getConstant(23, dl, TLI.getPointerTy()));
+  SDValue t1 = DAG.getNode(
+      ISD::SRL, dl, MVT::i32, t0,
+      DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
                            DAG.getConstant(127, dl, MVT::i32));
   return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
@@ -3484,7 +3521,8 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
   //   IntegerPartOfX <<= 23;
   IntegerPartOfX = DAG.getNode(
       ISD::SHL, dl, MVT::i32, IntegerPartOfX,
-      DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy()));
+      DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy(
+                                  DAG.getDataLayout())));
 
   SDValue TwoToFractionalPartOfX;
   if (LimitFloatPrecision <= 6) {
@@ -4071,11 +4109,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::vaend:    visitVAEnd(I); return nullptr;
   case Intrinsic::vacopy:   visitVACopy(I); return nullptr;
   case Intrinsic::returnaddress:
-    setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(),
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
+                             TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return nullptr;
   case Intrinsic::frameaddress:
-    setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
+                             TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return nullptr;
   case Intrinsic::read_register: {
@@ -4083,7 +4123,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue Chain = getRoot();
     SDValue RegName =
         DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
-    EVT VT = TLI.getValueType(I.getType());
+    EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
     Res = DAG.getNode(ISD::READ_REGISTER, sdl,
       DAG.getVTList(VT, MVT::Other), Chain, RegName);
     setValue(&I, Res);
@@ -4335,14 +4375,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   case Intrinsic::eh_dwarf_cfa: {
     SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
-                                        TLI.getPointerTy());
+                                        TLI.getPointerTy(DAG.getDataLayout()));
     SDValue Offset = DAG.getNode(ISD::ADD, sdl,
                                  CfaArg.getValueType(),
                                  DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl,
                                              CfaArg.getValueType()),
                                  CfaArg);
-    SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(),
-                             DAG.getConstant(0, sdl, TLI.getPointerTy()));
+    SDValue FA = DAG.getNode(
+        ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()),
+        DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
     setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
                              FA, Offset));
     return nullptr;
@@ -4444,7 +4485,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     ShOps[0] = ShAmt;
     ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
     ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
-    EVT DestVT = TLI.getValueType(I.getType());
+    EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
     ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
     Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
                        DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
@@ -4474,7 +4515,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     case Intrinsic::convertus:  Code = ISD::CVT_US; break;
     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
     }
-    EVT DestVT = TLI.getValueType(I.getType());
+    EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
     const Value *Op1 = I.getArgOperand(0);
     Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1),
                                DAG.getValueType(DestVT),
@@ -4564,7 +4605,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getValue(I.getArgOperand(2))));
     return nullptr;
   case Intrinsic::fmuladd: {
-    EVT VT = TLI.getValueType(I.getType());
+    EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
         TLI.isFMAFasterThanFMulAndFAdd(VT)) {
       setValue(&I, DAG.getNode(ISD::FMA, sdl,
@@ -4593,10 +4634,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                                                MVT::i32))));
     return nullptr;
   case Intrinsic::convert_from_fp16:
-    setValue(&I,
-             DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()),
-                         DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
-                                     getValue(I.getArgOperand(0)))));
+    setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
+                             TLI.getValueType(DAG.getDataLayout(), I.getType()),
+                             DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
+                                         getValue(I.getArgOperand(0)))));
     return nullptr;
   case Intrinsic::pcmarker: {
     SDValue Tmp = getValue(I.getArgOperand(0));
@@ -4640,8 +4681,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::stacksave: {
     SDValue Op = getRoot();
-    Res = DAG.getNode(ISD::STACKSAVE, sdl,
-                      DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op);
+    Res = DAG.getNode(
+        ISD::STACKSAVE, sdl,
+        DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
     setValue(&I, Res);
     DAG.setRoot(Res.getValue(1));
     return nullptr;
@@ -4655,7 +4697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Emit code into the DAG to store the stack guard onto the stack.
     MachineFunction &MF = DAG.getMachineFunction();
     MachineFrameInfo *MFI = MF.getFrameInfo();
-    EVT PtrTy = TLI.getPointerTy();
+    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
     SDValue Src, Chain = getRoot();
     const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand();
     const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr);
@@ -4753,7 +4795,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::adjust_trampoline: {
     setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
-                             TLI.getPointerTy(),
+                             TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return nullptr;
   }
@@ -4794,10 +4836,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     TargetLowering::ArgListTy Args;
 
     TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(sdl).setChain(getRoot())
-      .setCallee(CallingConv::C, I.getType(),
-                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
-                 std::move(Args), 0);
+    CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
+        CallingConv::C, I.getType(),
+        DAG.getExternalSymbol(TrapFuncName.data(),
+                              TLI.getPointerTy(DAG.getDataLayout())),
+        std::move(Args), 0);
 
     std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
     DAG.setRoot(Result.second);
@@ -4873,7 +4916,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
       SDValue Ops[2];
       Ops[0] = getRoot();
-      Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+      Ops[1] =
+          DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true);
       unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
 
       Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
@@ -4883,7 +4927,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::invariant_start:
     // Discard region information.
-    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
     return nullptr;
   case Intrinsic::invariant_end:
     // Discard region information.
@@ -4903,7 +4947,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::clear_cache:
     return TLI.getClearCacheBuiltinName();
   case Intrinsic::eh_actions:
-    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
     return nullptr;
   case Intrinsic::donothing:
     // ignore
@@ -4935,11 +4979,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::instrprof_increment:
     llvm_unreachable("instrprof failed to lower an increment");
 
-  case Intrinsic::frameescape: {
+  case Intrinsic::localescape: {
     MachineFunction &MF = DAG.getMachineFunction();
     const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
-    // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission
+    // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
     // is the same on all targets.
     for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
       Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
@@ -4953,7 +4997,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
           MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
               GlobalValue::getRealLinkageName(MF.getName()), Idx);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
-              TII->get(TargetOpcode::FRAME_ALLOC))
+              TII->get(TargetOpcode::LOCAL_ESCAPE))
           .addSym(FrameAllocSym)
           .addFrameIndex(FI);
     }
@@ -4961,10 +5005,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
 
-  case Intrinsic::framerecover: {
-    // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx)
+  case Intrinsic::localrecover: {
+    // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
     MachineFunction &MF = DAG.getMachineFunction();
-    MVT PtrVT = TLI.getPointerTy(0);
+    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0);
 
     // Get the symbol that defines the frame offset.
     auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
@@ -4978,7 +5022,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // that would make this PC relative.
     SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
     SDValue OffsetVal =
-        DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym);
+        DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);
 
     // Add the offset to the FP.
     Value *FP = I.getArgOperand(1);
@@ -4994,7 +5038,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::eh_exceptioncode: {
     unsigned Reg = TLI.getExceptionPointerRegister();
     assert(Reg && "cannot get exception code on this platform");
-    MVT PtrVT = TLI.getPointerTy();
+    MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
     const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
     assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
     unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
@@ -5178,7 +5222,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
                                                   SDValue Value,
                                                   bool IsSigned) {
-  EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
+  EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                    I.getType(), true);
   if (IsSigned)
     Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
   else
@@ -5203,7 +5248,8 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   const Value *Size = I.getArgOperand(2);
   const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
   if (CSize && CSize->getZExtValue() == 0) {
-    EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true);
+    EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+                                                          I.getType(), true);
     setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
     return true;
   }
@@ -5640,8 +5686,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
   if (!RenameFn)
     Callee = getValue(I.getCalledValue());
   else
-    Callee = DAG.getExternalSymbol(RenameFn,
-                                   DAG.getTargetLoweringInfo().getPointerTy());
+    Callee = DAG.getExternalSymbol(
+        RenameFn,
+        DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
 
   // Check if we can potentially perform a tail call. More detailed checking is
   // be done within LowerCallTo, after more information about the call is known.
@@ -5670,13 +5717,12 @@ public:
   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
-  EVT getCallOperandValEVT(LLVMContext &Context,
-                           const TargetLowering &TLI,
-                           const DataLayout *DL) const {
+  EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI,
+                           const DataLayout &DL) const {
     if (!CallOperandVal) return MVT::Other;
 
     if (isa<BasicBlock>(CallOperandVal))
-      return TLI.getPointerTy();
+      return TLI.getPointerTy(DL);
 
     llvm::Type *OpTy = CallOperandVal->getType();
 
@@ -5698,7 +5744,7 @@ public:
     // If OpTy is not a single value, it may be a struct/union that we
     // can tile with integers.
     if (!OpTy->isSingleValueType() && OpTy->isSized()) {
-      unsigned BitSize = DL->getTypeSizeInBits(OpTy);
+      unsigned BitSize = DL.getTypeSizeInBits(OpTy);
       switch (BitSize) {
       default: break;
       case 1:
@@ -5712,7 +5758,7 @@ public:
       }
     }
 
-    return TLI.getValueType(OpTy, true);
+    return TLI.getValueType(DL, OpTy, true);
   }
 };
 
@@ -5838,8 +5884,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   SDISelAsmOperandInfoVector ConstraintOperands;
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  TargetLowering::AsmOperandInfoVector TargetConstraints =
-      TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS);
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
+      DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
 
   bool hasMemory = false;
 
@@ -5864,10 +5910,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // corresponding argument.
       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo));
+        OpVT = TLI.getSimpleValueType(DAG.getDataLayout(),
+                                      STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpVT = TLI.getSimpleValueType(CS.getType());
+        OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
       }
       ++ResNo;
       break;
@@ -5888,8 +5935,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
-      OpVT =
-          OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT();
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
+                                         DAG.getDataLayout()).getSimpleVT();
     }
 
     OpInfo.ConstraintVT = OpVT;
@@ -5977,17 +6024,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       const Value *OpVal = OpInfo.CallOperandVal;
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
           isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
-        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
-                                                 TLI.getPointerTy());
+        OpInfo.CallOperand = DAG.getConstantPool(
+            cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
       } else {
         // Otherwise, create a stack slot and emit a store to it before the
         // asm.
         Type *Ty = OpVal->getType();
-        uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
-        unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
+        auto &DL = DAG.getDataLayout();
+        uint64_t TySize = DL.getTypeAllocSize(Ty);
+        unsigned Align = DL.getPrefTypeAlignment(Ty);
         MachineFunction &MF = DAG.getMachineFunction();
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        SDValue StackSlot =
+            DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
         Chain = DAG.getStore(Chain, getCurSDLoc(),
                              OpInfo.CallOperand, StackSlot,
                              MachinePointerInfo::getFixedStack(SSFI),
@@ -6022,9 +6071,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
   std::vector<SDValue> AsmNodeOperands;
   AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
-  AsmNodeOperands.push_back(
-          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
-                                      TLI.getPointerTy()));
+  AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
+      IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout())));
 
   // If we have a !srcloc metadata node associated with it, we want to attach
   // this to the ultimately generated inline asm machineinstr.  To do this, we
@@ -6064,8 +6112,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     }
   }
 
-  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, getCurSDLoc(),
-                                                  TLI.getPointerTy()));
+  AsmNodeOperands.push_back(DAG.getTargetConstant(
+      ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -6201,8 +6249,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
         OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
                                                     OpInfo.getMatchedOperand());
-        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, getCurSDLoc(),
-                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(DAG.getTargetConstant(
+            OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
         AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
         break;
       }
@@ -6227,16 +6275,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType =
           InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
-        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
-                                                        getCurSDLoc(),
-                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(DAG.getTargetConstant(
+            ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
         break;
       }
 
       if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
-        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+        assert(InOperandVal.getValueType() ==
+                   TLI.getPointerTy(DAG.getDataLayout()) &&
                "Memory operands expect pointer values");
 
         unsigned ConstraintID =
@@ -6314,7 +6362,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
     // FIXME: Why don't we do this for inline asms with MRVs?
     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
-      EVT ResultType = TLI.getValueType(CS.getType());
+      EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType());
 
       // If any of the results of the inline asm is a vector, it may have the
       // wrong width/num elts.  This can happen for register classes that can
@@ -6380,9 +6428,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
 
 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  const DataLayout &DL = *TLI.getDataLayout();
-  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(),
-                           getRoot(), getValue(I.getOperand(0)),
+  const DataLayout &DL = DAG.getDataLayout();
+  SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+                           getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)),
                            DL.getABITypeAlignment(I.getType()));
   setValue(&I, V);
@@ -6473,8 +6521,8 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx,
         Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64));
     } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) {
       const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo();
-      Ops.push_back(
-        Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy()));
+      Ops.push_back(Builder.DAG.getTargetFrameIndex(
+          FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout())));
     } else
       Ops.push_back(OpVal);
   }
@@ -6654,7 +6702,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
     // Create the return types based on the intrinsic definition
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     SmallVector<EVT, 3> ValueVTs;
-    ComputeValueVTs(TLI, CS->getType(), ValueVTs);
+    ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs);
     assert(ValueVTs.size() == 1 && "Expected only one return value type.");
 
     // There is always a chain and a glue type at the end
@@ -6718,10 +6766,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   Type *OrigRetTy = CLI.RetTy;
   SmallVector<EVT, 4> RetTys;
   SmallVector<uint64_t, 4> Offsets;
-  ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets);
+  auto &DL = CLI.DAG.getDataLayout();
+  ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
 
   SmallVector<ISD::OutputArg, 4> Outs;
-  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this);
+  GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);
 
   bool CanLowerReturn =
       this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
@@ -6733,13 +6782,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     // FIXME: equivalent assert?
     // assert(!CS.hasInAllocaArgument() &&
     //        "sret demotion is incompatible with inalloca");
-    uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy);
-    unsigned Align  = getDataLayout()->getPrefTypeAlignment(CLI.RetTy);
+    uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
+    unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
     MachineFunction &MF = CLI.DAG.getMachineFunction();
     DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
     Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
 
-    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy());
+    DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
     ArgListEntry Entry;
     Entry.Node = DemoteStackSlot;
     Entry.Ty = StackSlotPtrType;
@@ -6784,7 +6833,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
   ArgListTy &Args = CLI.getArgs();
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
     SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+    ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
     Type *FinalType = Args[i].Ty;
     if (Args[i].isByVal)
       FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
@@ -6797,7 +6846,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       SDValue Op = SDValue(Args[i].Node.getNode(),
                            Args[i].Node.getResNo() + Value);
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy);
+      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
 
       if (Args[i].isZExt)
         Flags.setZExt();
@@ -6821,14 +6870,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
       if (Args[i].isByVal || Args[i].isInAlloca) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
+        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
         // For ByVal, alignment should come from FE.  BE will guess if this
         // info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (Args[i].Alignment)
           FrameAlign = Args[i].Alignment;
         else
-          FrameAlign = getByValTypeAlignment(ElementTy);
+          FrameAlign = getByValTypeAlignment(ElementTy, DL);
         Flags.setByValAlign(FrameAlign);
       }
       if (Args[i].isNest)
@@ -6923,7 +6972,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     SmallVector<EVT, 1> PVTs;
     Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
 
-    ComputeValueVTs(*this, PtrRetTy, PVTs);
+    ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
     assert(PVTs.size() == 1 && "Pointers should fit in one register");
     EVT PtrVT = PVTs[0];
 
@@ -6997,7 +7046,8 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+                   V->getType());
   SDValue Chain = DAG.getEntryNode();
 
   ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -7030,13 +7080,14 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
 void SelectionDAGISel::LowerArguments(const Function &F) {
   SelectionDAG &DAG = SDB->DAG;
   SDLoc dl = SDB->getCurSDLoc();
-  const DataLayout *DL = TLI->getDataLayout();
+  const DataLayout &DL = DAG.getDataLayout();
   SmallVector<ISD::InputArg, 16> Ins;
 
   if (!FuncInfo->CanLowerReturn) {
     // Put in an sret pointer parameter before all the other parameters.
     SmallVector<EVT, 1> ValueVTs;
-    ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    ComputeValueVTs(*TLI, DAG.getDataLayout(),
+                    PointerType::getUnqual(F.getReturnType()), ValueVTs);
 
     // NOTE: Assuming that a pointer will never break down to more than one VT
     // or one register.
@@ -7053,7 +7104,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
   for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
        I != E; ++I, ++Idx) {
     SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(*TLI, I->getType(), ValueVTs);
+    ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
     bool isArgValueUsed = !I->use_empty();
     unsigned PartBase = 0;
     Type *FinalType = I->getType();
@@ -7066,7 +7117,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       EVT VT = ValueVTs[Value];
       Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
       ISD::ArgFlagsTy Flags;
-      unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy);
+      unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
 
       if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
         Flags.setZExt();
@@ -7090,14 +7141,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       if (Flags.isByVal() || Flags.isInAlloca()) {
         PointerType *Ty = cast<PointerType>(I->getType());
         Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(DL->getTypeAllocSize(ElementTy));
+        Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
         // For ByVal, alignment should be passed from FE.  BE will guess if
         // this info is not there but there are cases it cannot get right.
         unsigned FrameAlign;
         if (F.getParamAlignment(Idx))
           FrameAlign = F.getParamAlignment(Idx);
         else
-          FrameAlign = TLI->getByValTypeAlignment(ElementTy);
+          FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
         Flags.setByValAlign(FrameAlign);
       }
       if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
@@ -7153,7 +7204,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     // Create a virtual register for the sret pointer, and put in a copy
     // from the sret argument into it.
     SmallVector<EVT, 1> ValueVTs;
-    ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    ComputeValueVTs(*TLI, DAG.getDataLayout(),
+                    PointerType::getUnqual(F.getReturnType()), ValueVTs);
     MVT VT = ValueVTs[0].getSimpleVT();
     MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
     ISD::NodeType AssertOp = ISD::DELETED_NODE;
@@ -7177,7 +7229,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       ++I, ++Idx) {
     SmallVector<SDValue, 4> ArgValues;
     SmallVector<EVT, 4> ValueVTs;
-    ComputeValueVTs(*TLI, I->getType(), ValueVTs);
+    ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
     unsigned NumValues = ValueVTs.size();
 
     // If this argument is unused then remember its value. It is used to generate
@@ -7324,7 +7376,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // the input for this MBB.
       SmallVector<EVT, 4> ValueVTs;
       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs);
       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
         EVT VT = ValueVTs[vti];
         unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
@@ -7595,7 +7647,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
 
 bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) {
   // FIXME: Using the pointer type doesn't seem ideal.
-  uint64_t BW = DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+  uint64_t BW = DAG.getDataLayout().getPointerSizeInBits();
   uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1;
   return Range <= BW;
 }
@@ -7650,8 +7702,9 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
   APInt LowBound;
   APInt CmpRange;
 
-  const int BitWidth =
-      DAG.getTargetLoweringInfo().getPointerTy().getSizeInBits();
+  const int BitWidth = DAG.getTargetLoweringInfo()
+                           .getPointerTy(DAG.getDataLayout())
+                           .getSizeInBits();
   assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
 
   if (Low.isNonNegative() && High.slt(BitWidth)) {
@@ -7731,7 +7784,7 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
 
   // If target does not have legal shift left, do not emit bit tests at all.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT PTy = TLI.getPointerTy();
+  EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
   if (!TLI.isOperationLegal(ISD::SHL, PTy))
     return;
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f225d54d189d..700675453fe7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -755,8 +755,6 @@ public:
   void visitJumpTable(JumpTable &JT);
   void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
                             MachineBasicBlock *SwitchBB);
-  unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV,
-                                   MachineBasicBlock *LPadMBB);
 
 private:
   // These all get lowered before this pass.
@@ -915,8 +913,8 @@ struct RegsForValue {
 
   RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
 
-  RegsForValue(LLVMContext &Context, const TargetLowering &tli, unsigned Reg,
-               Type *Ty);
+  RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
+               const DataLayout &DL, unsigned Reg, Type *Ty);
 
   /// append - Add the specified values to this one.
   void append(const RegsForValue &RHS) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index ef468a2b1c54..5b9b18286fae 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -95,7 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
   case ISD::RETURNADDR:                 return "RETURNADDR";
   case ISD::FRAMEADDR:                  return "FRAMEADDR";
-  case ISD::FRAME_ALLOC_RECOVER:        return "FRAME_ALLOC_RECOVER";
+  case ISD::LOCAL_RECOVER:        return "LOCAL_RECOVER";
   case ISD::READ_REGISTER:              return "READ_REGISTER";
   case ISD::WRITE_REGISTER:             return "WRITE_REGISTER";
   case ISD::FRAME_TO_ARGS_OFFSET:       return "FRAME_TO_ARGS_OFFSET";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 31f8210f40f0..97ece8b9248a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -921,7 +921,8 @@ void SelectionDAGISel::DoInstructionSelection() {
 bool SelectionDAGISel::PrepareEHLandingPad() {
   MachineBasicBlock *MBB = FuncInfo->MBB;
 
-  const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy());
+  const TargetRegisterClass *PtrRC =
+      TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
 
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
@@ -1931,7 +1932,8 @@ SDNode
   MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
   const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   unsigned Reg =
-      TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0));
+      TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
+                             *CurDAG);
   SDValue New = CurDAG->getCopyFromReg(
                         Op->getOperand(0), dl, Reg, Op->getValueType(0));
   New->setNodeId(-1);
@@ -1944,7 +1946,8 @@ SDNode
   MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
   const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
-                                        Op->getOperand(2).getValueType());
+                                        Op->getOperand(2).getValueType(),
+                                        *CurDAG);
   SDValue New = CurDAG->getCopyToReg(
                         Op->getOperand(0), dl, Reg, Op->getOperand(2));
   New->setNodeId(-1);
@@ -2329,21 +2332,23 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 }
 
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
-CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
-          SDValue N, const TargetLowering *TLI) {
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
+          const TargetLowering *TLI, const DataLayout &DL) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
   if (N.getValueType() == VT) return true;
 
   // Handle the case when VT is iPTR.
-  return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy();
+  return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
 }
 
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
-               SDValue N, const TargetLowering *TLI, unsigned ChildNo) {
+               SDValue N, const TargetLowering *TLI, const DataLayout &DL,
+               unsigned ChildNo) {
   if (ChildNo >= N.getNumOperands())
     return false;  // Match fails if out of range child #.
-  return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+  return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI,
+                     DL);
 }
 
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2355,13 +2360,13 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
 CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
-               SDValue N, const TargetLowering *TLI) {
+               SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
   if (cast<VTSDNode>(N)->getVT() == VT)
     return true;
 
   // Handle the case when VT is iPTR.
-  return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy();
+  return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
 }
 
 LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2444,7 +2449,8 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
     Result = !::CheckOpcode(Table, Index, N.getNode());
     return Index;
   case SelectionDAGISel::OPC_CheckType:
-    Result = !::CheckType(Table, Index, N, SDISel.TLI);
+    Result = !::CheckType(Table, Index, N, SDISel.TLI,
+                          SDISel.CurDAG->getDataLayout());
     return Index;
   case SelectionDAGISel::OPC_CheckChild0Type:
   case SelectionDAGISel::OPC_CheckChild1Type:
@@ -2454,15 +2460,16 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
   case SelectionDAGISel::OPC_CheckChild5Type:
   case SelectionDAGISel::OPC_CheckChild6Type:
   case SelectionDAGISel::OPC_CheckChild7Type:
-    Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
-                               Table[Index - 1] -
-                                   SelectionDAGISel::OPC_CheckChild0Type);
+    Result = !::CheckChildType(
+                 Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(),
+                 Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type);
     return Index;
   case SelectionDAGISel::OPC_CheckCondCode:
     Result = !::CheckCondCode(Table, Index, N);
     return Index;
   case SelectionDAGISel::OPC_CheckValueType:
-    Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+    Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
+                               SDISel.CurDAG->getDataLayout());
     return Index;
   case SelectionDAGISel::OPC_CheckInteger:
     Result = !::CheckInteger(Table, Index, N);
@@ -2816,7 +2823,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       continue;
 
     case OPC_CheckType:
-      if (!::CheckType(MatcherTable, MatcherIndex, N, TLI))
+      if (!::CheckType(MatcherTable, MatcherIndex, N, TLI,
+                       CurDAG->getDataLayout()))
         break;
       continue;
 
@@ -2864,7 +2872,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
 
         MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
         if (CaseVT == MVT::iPTR)
-          CaseVT = TLI->getPointerTy();
+          CaseVT = TLI->getPointerTy(CurDAG->getDataLayout());
 
         // If the VT matches, then we will execute this case.
         if (CurNodeVT == CaseVT)
@@ -2887,14 +2895,16 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
     case OPC_CheckChild4Type: case OPC_CheckChild5Type:
     case OPC_CheckChild6Type: case OPC_CheckChild7Type:
       if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
-                            Opcode-OPC_CheckChild0Type))
+                            CurDAG->getDataLayout(),
+                            Opcode - OPC_CheckChild0Type))
         break;
       continue;
     case OPC_CheckCondCode:
       if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
       continue;
     case OPC_CheckValueType:
-      if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI))
+      if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
+                            CurDAG->getDataLayout()))
         break;
       continue;
     case OPC_CheckInteger:
@@ -3097,7 +3107,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
         MVT::SimpleValueType VT =
           (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
         if (VT == MVT::iPTR)
-          VT = TLI->getPointerTy().SimpleTy;
+          VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy;
         VTs.push_back(VT);
       }
 
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index bd40cac95543..34688df4765b 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -337,9 +337,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
       // TODO: To eliminate this problem we can remove gc.result intrinsics
       //       completelly and make statepoint call to return a tuple.
       unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
-      RegsForValue RFV(*Builder.DAG.getContext(),
-                       Builder.DAG.getTargetLoweringInfo(), Reg,
-                       ISP.getActualReturnType());
+      RegsForValue RFV(
+          *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(),
+          Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType());
       SDValue Chain = Builder.DAG.getEntryNode();
 
       RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain,
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e7722b392a81..fbf651277c7f 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -102,7 +102,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
   }
   if (LC == RTLIB::UNKNOWN_LIBCALL)
     report_fatal_error("Unsupported library call operation!");
-  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+                                         getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   TargetLowering::CallLoweringInfo CLI(DAG);
@@ -206,14 +207,16 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   NewRHS = DAG.getConstant(0, dl, RetVT);
   CCCode = getCmpLibcallCC(LC1);
   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
-    SDValue Tmp = DAG.getNode(ISD::SETCC, dl,
-                              getSetCCResultType(*DAG.getContext(), RetVT),
-                              NewLHS, NewRHS, DAG.getCondCode(CCCode));
+    SDValue Tmp = DAG.getNode(
+        ISD::SETCC, dl,
+        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+        NewLHS, NewRHS, DAG.getCondCode(CCCode));
     NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
                          dl).first;
-    NewLHS = DAG.getNode(ISD::SETCC, dl,
-                         getSetCCResultType(*DAG.getContext(), RetVT), NewLHS,
-                         NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
+    NewLHS = DAG.getNode(
+        ISD::SETCC, dl,
+        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
+        NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
     NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
     NewRHS = SDValue();
   }
@@ -242,7 +245,7 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
 
   if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
       (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
-    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(0));
+    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
 
   return Table;
 }
@@ -265,9 +268,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 
   // In dynamic-no-pic mode, assume that known defined values are safe.
   if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
-      GA &&
-      !GA->getGlobal()->isDeclaration() &&
-      !GA->getGlobal()->isWeakForLinker())
+      GA && GA->getGlobal()->isStrongDefinitionForLinker())
     return true;
 
   // Otherwise assume nothing is safe.
@@ -383,6 +384,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
          "Mask size mismatches value type size!");
   APInt NewMask = DemandedMask;
   SDLoc dl(Op);
+  auto &DL = TLO.DAG.getDataLayout();
 
   // Don't know anything.
   KnownZero = KnownOne = APInt(BitWidth, 0);
@@ -645,7 +647,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         unsigned InnerBits = InnerVT.getSizeInBits();
         if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
-          EVT ShTy = getShiftAmountTy(InnerVT);
+          EVT ShTy = getShiftAmountTy(InnerVT, DL);
           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
             ShTy = InnerVT;
           SDValue NarrowShl =
@@ -824,7 +826,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         // for scalar types after legalization.
         EVT ShiftAmtTy = Op.getValueType();
         if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
-          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+          ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
 
         SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
                                                ShiftAmtTy);
@@ -1009,8 +1011,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         SDValue Shift = In.getOperand(1);
         if (TLO.LegalTypes()) {
           uint64_t ShVal = ShAmt->getZExtValue();
-          Shift =
-            TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(Op.getValueType()));
+          Shift = TLO.DAG.getConstant(ShVal, dl,
+                                      getShiftAmountTy(Op.getValueType(), DL));
         }
 
         APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
@@ -1400,7 +1402,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
           for (unsigned offset=0; offset<origWidth/width; offset++) {
             if ((newMask & Mask) == Mask) {
-              if (!getDataLayout()->isLittleEndian())
+              if (!DAG.getDataLayout().isLittleEndian())
                 bestOffset = (origWidth/width - offset - 1) * (width/8);
               else
                 bestOffset = (uint64_t)offset * (width/8);
@@ -1473,7 +1475,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (DCI.isBeforeLegalizeOps() ||
             (isOperationLegal(ISD::SETCC, newVT) &&
              getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
-          EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT);
+          EVT NewSetCCVT =
+              getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
 
           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
@@ -1692,11 +1695,13 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
         (VT == N0.getValueType() ||
          (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
-        N0.getOpcode() == ISD::AND)
+        N0.getOpcode() == ISD::AND) {
+      auto &DL = DAG.getDataLayout();
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-        EVT ShiftTy = DCI.isBeforeLegalize() ?
-          getPointerTy() : getShiftAmountTy(N0.getValueType());
+        EVT ShiftTy = DCI.isBeforeLegalize()
+                          ? getPointerTy(DL)
+                          : getShiftAmountTy(N0.getValueType(), DL);
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
           if (AndRHS->getAPIntValue().isPowerOf2()) {
@@ -1716,6 +1721,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           }
         }
       }
+    }
 
     if (C1.getMinSignedBits() <= 64 &&
         !isLegalICmpImmediate(C1.getSExtValue())) {
@@ -1727,8 +1733,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           const APInt &AndRHSC = AndRHS->getAPIntValue();
           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
             unsigned ShiftBits = AndRHSC.countTrailingZeros();
-            EVT ShiftTy = DCI.isBeforeLegalize() ?
-              getPointerTy() : getShiftAmountTy(N0.getValueType());
+            auto &DL = DAG.getDataLayout();
+            EVT ShiftTy = DCI.isBeforeLegalize()
+                              ? getPointerTy(DL)
+                              : getShiftAmountTy(N0.getValueType(), DL);
             EVT CmpTy = N0.getValueType();
             SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
                                         DAG.getConstant(ShiftBits, dl,
@@ -1757,8 +1765,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         NewC = NewC.lshr(ShiftBits);
         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
           isLegalICmpImmediate(NewC.getSExtValue())) {
-          EVT ShiftTy = DCI.isBeforeLegalize() ?
-            getPointerTy() : getShiftAmountTy(N0.getValueType());
+          auto &DL = DAG.getDataLayout();
+          EVT ShiftTy = DCI.isBeforeLegalize()
+                            ? getPointerTy(DL)
+                            : getShiftAmountTy(N0.getValueType(), DL);
           EVT CmpTy = N0.getValueType();
           SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
@@ -1945,10 +1955,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                 Cond);
           if (N0.getNode()->hasOneUse()) {
             assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+            auto &DL = DAG.getDataLayout();
             // (Z-X) == X  --> Z == X<<1
-            SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
-                       DAG.getConstant(1, dl,
-                                       getShiftAmountTy(N1.getValueType())));
+            SDValue SH = DAG.getNode(
+                ISD::SHL, dl, N1.getValueType(), N1,
+                DAG.getConstant(1, dl,
+                                getShiftAmountTy(N1.getValueType(), DL)));
             if (!DCI.isCalledByLegalizer())
               DCI.AddToWorklist(SH.getNode());
             return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
@@ -1969,10 +1981,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                           DAG.getConstant(0, dl, N1.getValueType()), Cond);
         if (N1.getNode()->hasOneUse()) {
           assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+          auto &DL = DAG.getDataLayout();
           // X == (Z-X)  --> X<<1 == Z
-          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
-                       DAG.getConstant(1, dl,
-                                       getShiftAmountTy(N0.getValueType())));
+          SDValue SH = DAG.getNode(
+              ISD::SHL, dl, N1.getValueType(), N0,
+              DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL)));
           if (!DCI.isCalledByLegalizer())
             DCI.AddToWorklist(SH.getNode());
           return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
@@ -2105,9 +2118,8 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
 //  Inline Assembler Implementation Methods
 //===----------------------------------------------------------------------===//
 
-
 TargetLowering::ConstraintType
-TargetLowering::getConstraintType(const std::string &Constraint) const {
+TargetLowering::getConstraintType(StringRef Constraint) const {
   unsigned S = Constraint.size();
 
   if (S == 1) {
@@ -2140,7 +2152,7 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
   }
 
   if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
-    if (S == 8 && !Constraint.compare(1, 6, "memory", 6))  // "{memory}"
+    if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
       return C_Memory;
     return C_Register;
   }
@@ -2206,8 +2218,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
                                                  C ? SDLoc(C) : SDLoc(),
                                                  Op.getValueType(), Offs));
-        return;
       }
+      return;
     }
     if (C) {   // just C, no GV.
       // Simple constants are not allowed for 's'.
@@ -2217,8 +2229,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
         // ScheduleDAGSDNodes::EmitNode, which is very generic.
         Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
                                             SDLoc(C), MVT::i64));
-        return;
       }
+      return;
     }
     break;
   }
@@ -2227,7 +2239,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
 std::pair<unsigned, const TargetRegisterClass *>
 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
-                                             const std::string &Constraint,
+                                             StringRef Constraint,
                                              MVT VT) const {
   if (Constraint.empty() || Constraint[0] != '{')
     return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
@@ -2293,7 +2305,8 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
 /// If this returns an empty vector, and if the constraint string itself
 /// isn't empty, there was an error parsing.
 TargetLowering::AsmOperandInfoVector
-TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
+TargetLowering::ParseConstraints(const DataLayout &DL,
+                                 const TargetRegisterInfo *TRI,
                                  ImmutableCallSite CS) const {
   /// ConstraintOperands - Information about all of the constraints.
   AsmOperandInfoVector ConstraintOperands;
@@ -2329,10 +2342,11 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
       assert(!CS.getType()->isVoidTy() &&
              "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
-        OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo));
+        OpInfo.ConstraintVT =
+            getSimpleValueType(DL, STy->getElementType(ResNo));
       } else {
         assert(ResNo == 0 && "Asm only has one result!");
-        OpInfo.ConstraintVT = getSimpleValueType(CS.getType());
+        OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
       }
       ++ResNo;
       break;
@@ -2361,7 +2375,7 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
       // If OpTy is not a single value, it may be a struct/union that we
       // can tile with integers.
       if (!OpTy->isSingleValueType() && OpTy->isSized()) {
-        unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy);
+        unsigned BitSize = DL.getTypeSizeInBits(OpTy);
         switch (BitSize) {
         default: break;
         case 1:
@@ -2375,8 +2389,7 @@ TargetLowering::ParseConstraints(const TargetRegisterInfo *TRI,
           break;
         }
       } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
-        unsigned PtrSize
-          = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace());
+        unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
         OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
       } else {
         OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
@@ -2684,7 +2697,8 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
   if (ShAmt) {
     // TODO: For UDIV use SRL instead of SRA.
     SDValue Amt =
-        DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType()));
+        DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(),
+                                                        DAG.getDataLayout()));
     SDNodeFlags Flags;
     Flags.setExact(true);
     Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags);
@@ -2750,17 +2764,19 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
     Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
     Created->push_back(Q.getNode());
   }
+  auto &DL = DAG.getDataLayout();
   // Shift right algebraic if shift value is nonzero
   if (magics.s > 0) {
-    Q = DAG.getNode(ISD::SRA, dl, VT, Q,
-                    DAG.getConstant(magics.s, dl,
-                                    getShiftAmountTy(Q.getValueType())));
+    Q = DAG.getNode(
+        ISD::SRA, dl, VT, Q,
+        DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
     Created->push_back(Q.getNode());
   }
   // Extract the sign bit and add it to the quotient
-  SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q,
-                          DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
-                                          getShiftAmountTy(Q.getValueType())));
+  SDValue T =
+      DAG.getNode(ISD::SRL, dl, VT, Q,
+                  DAG.getConstant(VT.getScalarSizeInBits() - 1, dl,
+                                  getShiftAmountTy(Q.getValueType(), DL)));
   Created->push_back(T.getNode());
   return DAG.getNode(ISD::ADD, dl, VT, Q, T);
 }
@@ -2776,6 +2792,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
 
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  auto &DL = DAG.getDataLayout();
 
   // Check to see if we can do this.
   // FIXME: We should be more aggressive here.
@@ -2792,9 +2809,9 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
   // the divided value upfront.
   if (magics.a != 0 && !Divisor[0]) {
     unsigned Shift = Divisor.countTrailingZeros();
-    Q = DAG.getNode(ISD::SRL, dl, VT, Q,
-                    DAG.getConstant(Shift, dl,
-                                    getShiftAmountTy(Q.getValueType())));
+    Q = DAG.getNode(
+        ISD::SRL, dl, VT, Q,
+        DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL)));
     Created->push_back(Q.getNode());
 
     // Get magic number for the shifted divisor.
@@ -2819,21 +2836,22 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
   if (magics.a == 0) {
     assert(magics.s < Divisor.getBitWidth() &&
            "We shouldn't generate an undefined shift!");
-    return DAG.getNode(ISD::SRL, dl, VT, Q,
-                       DAG.getConstant(magics.s, dl,
-                                       getShiftAmountTy(Q.getValueType())));
+    return DAG.getNode(
+        ISD::SRL, dl, VT, Q,
+        DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL)));
   } else {
     SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
     Created->push_back(NPQ.getNode());
-    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
-                      DAG.getConstant(1, dl,
-                                      getShiftAmountTy(NPQ.getValueType())));
+    NPQ = DAG.getNode(
+        ISD::SRL, dl, VT, NPQ,
+        DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL)));
     Created->push_back(NPQ.getNode());
     NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
     Created->push_back(NPQ.getNode());
-    return DAG.getNode(ISD::SRL, dl, VT, NPQ,
-                       DAG.getConstant(magics.s - 1, dl,
-                                       getShiftAmountTy(NPQ.getValueType())));
+    return DAG.getNode(
+        ISD::SRL, dl, VT, NPQ,
+        DAG.getConstant(magics.s - 1, dl,
+                        getShiftAmountTy(NPQ.getValueType(), DL)));
   }
 }
 
@@ -2919,8 +2937,9 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
     if (!LH.getNode() && !RH.getNode() &&
         isOperationLegalOrCustom(ISD::SRL, VT) &&
         isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+      auto &DL = DAG.getDataLayout();
       unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
-      SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT));
+      SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL));
       LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
       LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
       RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
@@ -2980,14 +2999,15 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
 
   SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
 
-  SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
-      DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
-      DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT)));
+  auto &DL = DAG.getDataLayout();
+  SDValue ExponentBits = DAG.getNode(
+      ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+      DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
   SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
 
-  SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
-      DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
-      DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT)));
+  SDValue Sign = DAG.getNode(
+      ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+      DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
   Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
 
   SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
@@ -2996,17 +3016,17 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
 
   R = DAG.getZExtOrTrunc(R, dl, NVT);
 
-
-  R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
-     DAG.getNode(ISD::SHL, dl, NVT, R,
-                 DAG.getZExtOrTrunc(
-                    DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
-                    dl, getShiftAmountTy(IntVT))),
-     DAG.getNode(ISD::SRL, dl, NVT, R,
-                 DAG.getZExtOrTrunc(
-                    DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
-                    dl, getShiftAmountTy(IntVT))),
-     ISD::SETGT);
+  R = DAG.getSelectCC(
+      dl, Exponent, ExponentLoBit,
+      DAG.getNode(ISD::SHL, dl, NVT, R,
+                  DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+                      dl, getShiftAmountTy(IntVT, DL))),
+      DAG.getNode(ISD::SRL, dl, NVT, R,
+                  DAG.getZExtOrTrunc(
+                      DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+                      dl, getShiftAmountTy(IntVT, DL))),
+      ISD::SETGT);
 
   SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
       DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index 0e89bad5f26f..00db94256844 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -15,9 +15,5 @@
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
-TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL)
-  : DL(DL) {
-}
-
 TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
 }
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 116fd5be0337..d236e1f5ab6f 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -45,7 +45,6 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 namespace {
 class SjLjEHPrepare : public FunctionPass {
-  const TargetMachine *TM;
   Type *doubleUnderDataTy;
   Type *doubleUnderJBufTy;
   Type *FunctionContextTy;
@@ -63,7 +62,7 @@ class SjLjEHPrepare : public FunctionPass {
 
 public:
   static char ID; // Pass identification, replacement for typeid
-  explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {}
+  explicit SjLjEHPrepare() : FunctionPass(ID) {}
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
 
@@ -83,11 +82,11 @@ private:
 } // end anonymous namespace
 
 char SjLjEHPrepare::ID = 0;
+INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions",
+                false, false)
 
 // Public Interface To the SjLjEHPrepare pass.
-FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) {
-  return new SjLjEHPrepare(TM);
-}
+FunctionPass *llvm::createSjLjEHPreparePass() { return new SjLjEHPrepare(); }
 // doInitialization - Set up decalarations and types needed to process
 // exceptions.
 bool SjLjEHPrepare::doInitialization(Module &M) {
@@ -196,9 +195,8 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   // Create an alloca for the incoming jump buffer ptr and the new jump buffer
   // that needs to be restored on all exits from the function. This is an alloca
   // because the value needs to be added to the global context list.
-  const TargetLowering *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
-  unsigned Align =
-      TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
+  auto &DL = F.getParent()->getDataLayout();
+  unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
   FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
                            EntryBB->begin());
 
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index d88be575d56c..855058358fe4 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -49,7 +49,6 @@ namespace {
 /// information provided by this pass is optional and not required by the
 /// aformentioned intrinsic to function.
 class StackMapLiveness : public MachineFunctionPass {
-  MachineFunction *MF;
   const TargetRegisterInfo *TRI;
   LivePhysRegs LiveRegs;
 
@@ -68,14 +67,14 @@ public:
 
 private:
   /// \brief Performs the actual liveness calculation for the function.
-  bool calculateLiveness();
+  bool calculateLiveness(MachineFunction &MF);
 
   /// \brief Add the current register live set to the instruction.
-  void addLiveOutSetToMI(MachineInstr &MI);
+  void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI);
 
   /// \brief Create a register mask and initialize it with the registers from
   /// the register live set.
-  uint32_t *createRegisterMask() const;
+  uint32_t *createRegisterMask(MachineFunction &MF) const;
 };
 } // namespace
 
@@ -95,8 +94,7 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
   // We preserve all information.
   AU.setPreservesAll();
   AU.setPreservesCFG();
-  // Default dependencie for all MachineFunction passes.
-  AU.addRequired<MachineFunctionAnalysis>();
+  MachineFunctionPass::getAnalysisUsage(AU);
 }
 
 /// Calculate the liveness information for the given machine function.
@@ -106,7 +104,6 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName()
                << " **********\n");
-  this->MF = &MF;
   TRI = MF.getSubtarget().getRegisterInfo();
   ++NumStackMapFuncVisited;
 
@@ -115,25 +112,23 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
     ++NumStackMapFuncSkipped;
     return false;
   }
-  return calculateLiveness();
+  return calculateLiveness(MF);
 }
 
 /// Performs the actual liveness calculation for the function.
-bool StackMapLiveness::calculateLiveness() {
+bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
   bool HasChanged = false;
   // For all basic blocks in the function.
-  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
-       MBBI != MBBE; ++MBBI) {
-    DEBUG(dbgs() << "****** BB " << MBBI->getName() << " ******\n");
+  for (auto &MBB : MF) {
+    DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
     LiveRegs.init(TRI);
-    LiveRegs.addLiveOuts(MBBI);
+    LiveRegs.addLiveOuts(&MBB);
     bool HasStackMap = false;
     // Reverse iterate over all instructions and add the current live register
     // set to an instruction if we encounter a patchpoint instruction.
-    for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(),
-         E = MBBI->rend(); I != E; ++I) {
+    for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
       if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
-        addLiveOutSetToMI(*I);
+        addLiveOutSetToMI(MF, *I);
         HasChanged = true;
         HasStackMap = true;
         ++NumStackMaps;
@@ -149,21 +144,23 @@ bool StackMapLiveness::calculateLiveness() {
 }
 
 /// Add the current register live set to the instruction.
-void StackMapLiveness::addLiveOutSetToMI(MachineInstr &MI) {
-  uint32_t *Mask = createRegisterMask();
+void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF,
+                                         MachineInstr &MI) {
+  uint32_t *Mask = createRegisterMask(MF);
   MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask);
-  MI.addOperand(*MF, MO);
+  MI.addOperand(MF, MO);
 }
 
 /// Create a register mask and initialize it with the registers from the
 /// register live set.
-uint32_t *StackMapLiveness::createRegisterMask() const {
+uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const {
   // The mask is owned and cleaned up by the Machine Function.
-  uint32_t *Mask = MF->allocateRegisterMask(TRI->getNumRegs());
-  for (LivePhysRegs::const_iterator RI = LiveRegs.begin(), RE = LiveRegs.end();
-       RI != RE; ++RI)
-    Mask[*RI / 32] |= 1U << (*RI % 32);
+  uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+  for (auto Reg : LiveRegs)
+    Mask[Reg / 32] |= 1U << (Reg % 32);
 
+  // Give the target a chance to adjust the mask.
   TRI->adjustStackMapLiveOutMask(Mask);
+
   return Mask;
 }
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 1e8e03f9a7df..116eef66c580 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -29,17 +29,17 @@ using namespace llvm;
 
 #define DEBUG_TYPE "stackmaps"
 
-static cl::opt<int> StackMapVersion("stackmap-version", cl::init(1),
-  cl::desc("Specify the stackmap encoding version (default = 1)"));
+static cl::opt<int> StackMapVersion(
+    "stackmap-version", cl::init(1),
+    cl::desc("Specify the stackmap encoding version (default = 1)"));
 
 const char *StackMaps::WSMP = "Stack Maps: ";
 
 PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
-  : MI(MI),
-    HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
-           !MI->getOperand(0).isImplicit()),
-    IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg)
-{
+    : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+                     !MI->getOperand(0).isImplicit()),
+      IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() ==
+               CallingConv::AnyReg) {
 #ifndef NDEBUG
   unsigned CheckStartIdx = 0, e = MI->getNumOperands();
   while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
@@ -76,30 +76,31 @@ StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
 
 /// Go up the super-register chain until we hit a valid dwarf register number.
 static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
-  int RegNo = TRI->getDwarfRegNum(Reg, false);
-  for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNo < 0; ++SR)
-    RegNo = TRI->getDwarfRegNum(*SR, false);
+  int RegNum = TRI->getDwarfRegNum(Reg, false);
+  for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNum < 0; ++SR)
+    RegNum = TRI->getDwarfRegNum(*SR, false);
 
-  assert(RegNo >= 0 && "Invalid Dwarf register number.");
-  return (unsigned) RegNo;
+  assert(RegNum >= 0 && "Invalid Dwarf register number.");
+  return (unsigned)RegNum;
 }
 
 MachineInstr::const_mop_iterator
 StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
-                        MachineInstr::const_mop_iterator MOE,
-                        LocationVec &Locs, LiveOutVec &LiveOuts) const {
+                        MachineInstr::const_mop_iterator MOE, LocationVec &Locs,
+                        LiveOutVec &LiveOuts) const {
   const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo();
   if (MOI->isImm()) {
     switch (MOI->getImm()) {
-    default: llvm_unreachable("Unrecognized operand type.");
+    default:
+      llvm_unreachable("Unrecognized operand type.");
     case StackMaps::DirectMemRefOp: {
       unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
       assert((Size % 8) == 0 && "Need pointer size in bytes.");
       Size /= 8;
       unsigned Reg = (++MOI)->getReg();
       int64_t Imm = (++MOI)->getImm();
-      Locs.push_back(Location(StackMaps::Location::Direct, Size,
-                              getDwarfRegNum(Reg, TRI), Imm));
+      Locs.emplace_back(StackMaps::Location::Direct, Size,
+                        getDwarfRegNum(Reg, TRI), Imm);
       break;
     }
     case StackMaps::IndirectMemRefOp: {
@@ -107,15 +108,15 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
       assert(Size > 0 && "Need a valid size for indirect memory locations.");
       unsigned Reg = (++MOI)->getReg();
       int64_t Imm = (++MOI)->getImm();
-      Locs.push_back(Location(StackMaps::Location::Indirect, Size,
-                              getDwarfRegNum(Reg, TRI), Imm));
+      Locs.emplace_back(StackMaps::Location::Indirect, Size,
+                        getDwarfRegNum(Reg, TRI), Imm);
       break;
     }
     case StackMaps::ConstantOp: {
       ++MOI;
       assert(MOI->isImm() && "Expected constant operand.");
       int64_t Imm = MOI->getImm();
-      Locs.push_back(Location(Location::Constant, sizeof(int64_t), 0, Imm));
+      Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, Imm);
       break;
     }
     }
@@ -137,14 +138,13 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
     assert(!MOI->getSubReg() && "Physical subreg still around.");
 
     unsigned Offset = 0;
-    unsigned RegNo = getDwarfRegNum(MOI->getReg(), TRI);
-    unsigned LLVMRegNo = TRI->getLLVMRegNum(RegNo, false);
-    unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNo, MOI->getReg());
+    unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI);
+    unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false);
+    unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg());
     if (SubRegIdx)
       Offset = TRI->getSubRegIdxOffset(SubRegIdx);
 
-    Locs.push_back(
-      Location(Location::Register, RC->getSize(), RegNo, Offset));
+    Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset);
     return ++MOI;
   }
 
@@ -165,19 +165,19 @@ void StackMaps::print(raw_ostream &OS) {
     OS << WSMP << "callsite " << CSI.ID << "\n";
     OS << WSMP << "  has " << CSLocs.size() << " locations\n";
 
-    unsigned OperIdx = 0;
+    unsigned Idx = 0;
     for (const auto &Loc : CSLocs) {
-      OS << WSMP << "  Loc " << OperIdx << ": ";
-      switch (Loc.LocType) {
+      OS << WSMP << "\t\tLoc " << Idx << ": ";
+      switch (Loc.Type) {
       case Location::Unprocessed:
         OS << "<Unprocessed operand>";
         break;
       case Location::Register:
         OS << "Register ";
-	if (TRI)
-	  OS << TRI->getName(Loc.Reg);
-	else
-	  OS << Loc.Reg;
+        if (TRI)
+          OS << TRI->getName(Loc.Reg);
+        else
+          OS << Loc.Reg;
         break;
       case Location::Direct:
         OS << "Direct ";
@@ -203,23 +203,23 @@ void StackMaps::print(raw_ostream &OS) {
         OS << "Constant Index " << Loc.Offset;
         break;
       }
-      OS << "     [encoding: .byte " << Loc.LocType << ", .byte " << Loc.Size
+      OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size
          << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n";
-      OperIdx++;
+      Idx++;
     }
 
-    OS << WSMP << "  has " << LiveOuts.size() << " live-out registers\n";
+    OS << WSMP << "\thas " << LiveOuts.size() << " live-out registers\n";
 
-    OperIdx = 0;
+    Idx = 0;
     for (const auto &LO : LiveOuts) {
-      OS << WSMP << "  LO " << OperIdx << ": ";
+      OS << WSMP << "\t\tLO " << Idx << ": ";
       if (TRI)
         OS << TRI->getName(LO.Reg);
       else
         OS << LO.Reg;
-      OS << "      [encoding: .short " << LO.RegNo << ", .byte 0, .byte "
+      OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte "
          << LO.Size << "]\n";
-      OperIdx++;
+      Idx++;
     }
   }
 }
@@ -227,9 +227,9 @@ void StackMaps::print(raw_ostream &OS) {
 /// Create a live-out register record for the given register Reg.
 StackMaps::LiveOutReg
 StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const {
-  unsigned RegNo = getDwarfRegNum(Reg, TRI);
+  unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI);
   unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
-  return LiveOutReg(Reg, RegNo, Size);
+  return LiveOutReg(Reg, DwarfRegNum, Size);
 }
 
 /// Parse the register live-out mask and return a vector of live-out registers
@@ -248,11 +248,16 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
   // We don't need to keep track of a register if its super-register is already
   // in the list. Merge entries that refer to the same dwarf register and use
   // the maximum size that needs to be spilled.
-  std::sort(LiveOuts.begin(), LiveOuts.end());
-  for (LiveOutVec::iterator I = LiveOuts.begin(), E = LiveOuts.end();
-       I != E; ++I) {
-    for (LiveOutVec::iterator II = std::next(I); II != E; ++II) {
-      if (I->RegNo != II->RegNo) {
+
+  std::sort(LiveOuts.begin(), LiveOuts.end(),
+            [](const LiveOutReg &LHS, const LiveOutReg &RHS) {
+              // Only sort by the dwarf register number.
+              return LHS.DwarfRegNum < RHS.DwarfRegNum;
+            });
+
+  for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) {
+    for (auto II = std::next(I); II != E; ++II) {
+      if (I->DwarfRegNum != II->DwarfRegNum) {
         // Skip all the now invalid entries.
         I = --II;
         break;
@@ -260,11 +265,15 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
       I->Size = std::max(I->Size, II->Size);
       if (TRI->isSuperRegister(I->Reg, II->Reg))
         I->Reg = II->Reg;
-      II->MarkInvalid();
+      II->Reg = 0; // mark for deletion.
     }
   }
-  LiveOuts.erase(std::remove_if(LiveOuts.begin(), LiveOuts.end(),
-                                LiveOutReg::IsInvalid), LiveOuts.end());
+
+  LiveOuts.erase(
+      std::remove_if(LiveOuts.begin(), LiveOuts.end(),
+                     [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+      LiveOuts.end());
+
   return LiveOuts;
 }
 
@@ -282,8 +291,8 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
 
   if (recordResult) {
     assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value.");
-    parseOperand(MI.operands_begin(), std::next(MI.operands_begin()),
-                 Locations, LiveOuts);
+    parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), Locations,
+                 LiveOuts);
   }
 
   // Parse operands.
@@ -292,33 +301,31 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
   }
 
   // Move large constants into the constant pool.
-  for (LocationVec::iterator I = Locations.begin(), E = Locations.end();
-       I != E; ++I) {
+  for (auto &Loc : Locations) {
     // Constants are encoded as sign-extended integers.
     // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool.
-    if (I->LocType == Location::Constant && !isInt<32>(I->Offset)) {
-      I->LocType = Location::ConstantIndex;
+    if (Loc.Type == Location::Constant && !isInt<32>(Loc.Offset)) {
+      Loc.Type = Location::ConstantIndex;
       // ConstPool is intentionally a MapVector of 'uint64_t's (as
       // opposed to 'int64_t's).  We should never be in a situation
       // where we have to insert either the tombstone or the empty
       // keys into a map, and for a DenseMap<uint64_t, T> these are
       // (uint64_t)0 and (uint64_t)-1.  They can be and are
       // represented using 32 bit integers.
-
-      assert((uint64_t)I->Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
-             (uint64_t)I->Offset != DenseMapInfo<uint64_t>::getTombstoneKey() &&
+      assert((uint64_t)Loc.Offset != DenseMapInfo<uint64_t>::getEmptyKey() &&
+             (uint64_t)Loc.Offset !=
+                 DenseMapInfo<uint64_t>::getTombstoneKey() &&
              "empty and tombstone keys should fit in 32 bits!");
-      auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset));
-      I->Offset = Result.first - ConstPool.begin();
+      auto Result = ConstPool.insert(std::make_pair(Loc.Offset, Loc.Offset));
+      Loc.Offset = Result.first - ConstPool.begin();
     }
   }
 
   // Create an expression to calculate the offset of the callsite from function
   // entry.
   const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
-    MCSymbolRefExpr::create(MILabel, OutContext),
-    MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext),
-    OutContext);
+      MCSymbolRefExpr::create(MILabel, OutContext),
+      MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
 
   CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
                        std::move(LiveOuts));
@@ -326,10 +333,10 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
   // Record the stack size of the current function.
   const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
   const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
-  const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
-    RegInfo->needsStackRealignment(*(AP.MF));
+  bool HasDynamicFrameSize =
+      MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
   FnStackSize[AP.CurrentFnSym] =
-    DynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
+      HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
 }
 
 void StackMaps::recordStackMap(const MachineInstr &MI) {
@@ -346,25 +353,23 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
   PatchPointOpers opers(&MI);
   int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
 
-  MachineInstr::const_mop_iterator MOI =
-    std::next(MI.operands_begin(), opers.getStackMapStartIdx());
+  auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
   recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
                       opers.isAnyReg() && opers.hasDef());
 
 #ifndef NDEBUG
   // verify anyregcc
-  LocationVec &Locations = CSInfos.back().Locations;
+  auto &Locations = CSInfos.back().Locations;
   if (opers.isAnyReg()) {
     unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm();
-    for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i)
-      assert(Locations[i].LocType == Location::Register &&
+    for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i)
+      assert(Locations[i].Type == Location::Register &&
              "anyreg arg must be in reg.");
   }
 #endif
 }
 void StackMaps::recordStatepoint(const MachineInstr &MI) {
-  assert(MI.getOpcode() == TargetOpcode::STATEPOINT &&
-         "expected statepoint");
+  assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
 
   StatepointOpers opers(&MI);
   // Record all the deopt and gc operands (they're contiguous and run from the
@@ -387,8 +392,8 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) {
 void StackMaps::emitStackmapHeader(MCStreamer &OS) {
   // Header.
   OS.EmitIntValue(StackMapVersion, 1); // Version.
-  OS.EmitIntValue(0, 1); // Reserved.
-  OS.EmitIntValue(0, 2); // Reserved.
+  OS.EmitIntValue(0, 1);               // Reserved.
+  OS.EmitIntValue(0, 2);               // Reserved.
 
   // Num functions.
   DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
@@ -412,7 +417,7 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
   DEBUG(dbgs() << WSMP << "functions:\n");
   for (auto const &FR : FnStackSize) {
     DEBUG(dbgs() << WSMP << "function addr: " << FR.first
-                         << " frame size: " << FR.second);
+                 << " frame size: " << FR.second);
     OS.EmitSymbolValue(FR.first, 8);
     OS.EmitIntValue(FR.second, 8);
   }
@@ -424,7 +429,7 @@ void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
 void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
   // Constant pool entries.
   DEBUG(dbgs() << WSMP << "constants:\n");
-  for (auto ConstEntry : ConstPool) {
+  for (const auto &ConstEntry : ConstPool) {
     DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
     OS.EmitIntValue(ConstEntry.second, 8);
   }
@@ -489,7 +494,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
     OS.EmitIntValue(CSLocs.size(), 2);
 
     for (const auto &Loc : CSLocs) {
-      OS.EmitIntValue(Loc.LocType, 1);
+      OS.EmitIntValue(Loc.Type, 1);
       OS.EmitIntValue(Loc.Size, 1);
       OS.EmitIntValue(Loc.Reg, 2);
       OS.EmitIntValue(Loc.Offset, 4);
@@ -500,7 +505,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
     OS.EmitIntValue(LiveOuts.size(), 2);
 
     for (const auto &LO : LiveOuts) {
-      OS.EmitIntValue(LO.RegNo, 2);
+      OS.EmitIntValue(LO.DwarfRegNum, 2);
       OS.EmitIntValue(0, 1);
       OS.EmitIntValue(LO.Size, 1);
     }
@@ -511,7 +516,7 @@ void StackMaps::emitCallsiteEntries(MCStreamer &OS) {
 
 /// Serialize the stackmap data.
 void StackMaps::serializeToStackMapSection() {
-  (void) WSMP;
+  (void)WSMP;
   // Bail out if there's no stack map data.
   assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) &&
          "Expected empty constant pool too!");
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 0824d6f91db0..bcea37a3aafa 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -122,7 +122,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
 
     // If an array has more than SSPBufferSize bytes of allocated space, then we
     // emit stack protectors.
-    if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) {
+    if (SSPBufferSize <= M->getDataLayout().getTypeAllocSize(AT)) {
       IsLarge = true;
       return true;
     }
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 56383247eadb..f3cccd82a5c5 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -11,9 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -54,3 +57,30 @@ bool TargetFrameLowering::needsFrameIndexResolution(
     const MachineFunction &MF) const {
   return MF.getFrameInfo()->hasStackObjects();
 }
+
+void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                               BitVector &SavedRegs,
+                                               RegScavenger *RS) const {
+  // Get the callee saved register list...
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
+
+  // Early exit if there are no callee saved registers.
+  if (!CSRegs || CSRegs[0] == 0)
+    return;
+
+  SavedRegs.resize(TRI.getNumRegs());
+
+  // In Naked functions we aren't going to save any registers.
+  if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
+    return;
+
+  // Functions which call __builtin_unwind_init get all their registers saved.
+  bool CallsUnwindInit = MF.getMMI().callsUnwindInit();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    if (CallsUnwindInit || MRI.isPhysRegModified(Reg))
+      SavedRegs.set(Reg);
+  }
+}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 78492a6e8818..ecfd65931574 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -750,7 +750,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   initActions();
 
   // Perform these initializations only once.
-  IsLittleEndian = getDataLayout()->isLittleEndian();
   MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8;
   MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize
     = MaxStoresPerMemmoveOptSize = 4;
@@ -879,28 +878,17 @@ void TargetLoweringBase::initActions() {
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
 }
 
-MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
-  return MVT::getIntegerVT(getPointerSizeInBits(AS));
+MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
+                                               EVT) const {
+  return MVT::getIntegerVT(8 * DL.getPointerSize(0));
 }
 
-unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
-  return getDataLayout()->getPointerSizeInBits(AS);
-}
-
-unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
-  assert(Ty->isPointerTy());
-  return getPointerSizeInBits(Ty->getPointerAddressSpace());
-}
-
-MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
-  return MVT::getIntegerVT(8 * getDataLayout()->getPointerSize(0));
-}
-
-EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const {
+EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
+                                         const DataLayout &DL) const {
   assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
   if (LHSTy.isVector())
     return LHSTy;
-  return getScalarShiftAmountTy(LHSTy);
+  return getScalarShiftAmountTy(DL, LHSTy);
 }
 
 /// canOpTrap - Returns true if the operation can trap for the value type.
@@ -1398,9 +1386,10 @@ void TargetLoweringBase::computeRegisterProperties(
   }
 }
 
-EVT TargetLoweringBase::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+                                           EVT VT) const {
   assert(!VT.isVector() && "No default SetCC type for vectors!");
-  return getPointerTy(0).SimpleTy;
+  return getPointerTy(DL).SimpleTy;
 }
 
 MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
@@ -1485,11 +1474,11 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
 /// type of the given function.  This does not require a DAG or a return value,
 /// and is suitable for use before any DAGs for the function are constructed.
 /// TODO: Move this out of TargetLowering.cpp.
-void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
+void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr,
                          SmallVectorImpl<ISD::OutputArg> &Outs,
-                         const TargetLowering &TLI) {
+                         const TargetLowering &TLI, const DataLayout &DL) {
   SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, ReturnType, ValueVTs);
+  ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
   unsigned NumValues = ValueVTs.size();
   if (NumValues == 0) return;
 
@@ -1534,8 +1523,9 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr,
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.  This is the actual
 /// alignment, not its logarithm.
-unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const {
-  return getDataLayout()->getABITypeAlignment(Ty);
+unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
+                                                   const DataLayout &DL) const {
+  return DL.getABITypeAlignment(Ty);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1614,9 +1604,10 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
 }
 
 std::pair<unsigned, MVT>
-TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
+TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
+                                            Type *Ty) const {
   LLVMContext &C = Ty->getContext();
-  EVT MTy = getValueType(Ty);
+  EVT MTy = getValueType(DL, Ty);
 
   unsigned Cost = 1;
   // We keep legalizing the type until we find a legal kind. We assume that
@@ -1642,8 +1633,8 @@ TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const {
 
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM,
-                                               Type *Ty,
+bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
+                                               const AddrMode &AM, Type *Ty,
                                                unsigned AS) const {
   // The default implementation of this implements a conservative RISCy, r+r and
   // r+i addr mode.
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index e84bea63995e..1e30821dc741 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1215,11 +1215,11 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
   //   addl	%esi, %edi
   //   movl	%edi, %eax
   //   ret
-  bool commuted = false;
+  bool Commuted = false;
 
   // If it's profitable to commute, try to do so.
   if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
-    commuted = true;
+    Commuted = true;
     ++NumCommuted;
     if (AggressiveCommute)
       ++NumAggrCommuted;
@@ -1232,7 +1232,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
   // If there is one more use of regB later in the same MBB, consider
   // re-schedule this MI below it.
-  if (!commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
+  if (!Commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) {
     ++NumReSchedDowns;
     return true;
   }
@@ -1250,7 +1250,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
   }
 
   // Return if it is commuted but 3 addr conversion is failed.
-  if (commuted)
+  if (Commuted)
     return false;
 
   // If there is one more use of regB later in the same MBB, consider
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 2912bdd63426..02341b4d66b8 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -163,7 +163,6 @@ class VirtRegRewriter : public MachineFunctionPass {
   SlotIndexes *Indexes;
   LiveIntervals *LIS;
   VirtRegMap *VRM;
-  SparseSet<unsigned> PhysRegs;
 
   void rewrite();
   void addMBBLiveIns();
@@ -319,54 +318,15 @@ void VirtRegRewriter::rewrite() {
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
-  SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
-
-  // Here we have a SparseSet to hold which PhysRegs are actually encountered
-  // in the MF we are about to iterate over so that later when we call
-  // setPhysRegUsed, we are only doing it for physRegs that were actually found
-  // in the program and not for all of the possible physRegs for the given
-  // target architecture. If the target has a lot of physRegs, then for a small
-  // program there will be a significant compile time reduction here.
-  PhysRegs.clear();
-  PhysRegs.setUniverse(TRI->getNumRegs());
-
-  // The function with uwtable should guarantee that the stack unwinder
-  // can unwind the stack to the previous frame.  Thus, we can't apply the
-  // noreturn optimization if the caller function has uwtable attribute.
-  bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
     DEBUG(MBBI->print(dbgs(), Indexes));
-    bool IsExitBB = MBBI->succ_empty();
     for (MachineBasicBlock::instr_iterator
            MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
       MachineInstr *MI = MII;
       ++MII;
 
-      // Check if this instruction is a call to a noreturn function.  If this
-      // is a call to noreturn function and we don't need the stack unwinding
-      // functionality (i.e. this function does not have uwtable attribute and
-      // the callee function has the nounwind attribute), then we can ignore
-      // the definitions set by this instruction.
-      if (!HasUWTable && IsExitBB && MI->isCall()) {
-        for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
-               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
-          MachineOperand &MO = *MOI;
-          if (!MO.isGlobal())
-            continue;
-          const Function *Func = dyn_cast<Function>(MO.getGlobal());
-          if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
-              // We need to keep correct unwind information
-              // even if the function will not return, since the
-              // runtime may need it.
-              !Func->hasFnAttribute(Attribute::NoUnwind))
-            continue;
-          NoReturnInsts.insert(MI);
-          break;
-        }
-      }
-
       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
            MOE = MI->operands_end(); MOI != MOE; ++MOI) {
         MachineOperand &MO = *MOI;
@@ -375,15 +335,6 @@ void VirtRegRewriter::rewrite() {
         if (MO.isRegMask())
           MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
 
-        // If we encounter a VirtReg or PhysReg then get at the PhysReg and add
-        // it to the physreg bitset.  Later we use only the PhysRegs that were
-        // actually encountered in the MF to populate the MRI's used physregs.
-        if (MO.isReg() && MO.getReg())
-          PhysRegs.insert(
-              TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
-              VRM->getPhys(MO.getReg()) :
-              MO.getReg());
-
         if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
           continue;
         unsigned VirtReg = MO.getReg();
@@ -470,29 +421,5 @@ void VirtRegRewriter::rewrite() {
       }
     }
   }
-
-  // Tell MRI about physical registers in use.
-  if (NoReturnInsts.empty()) {
-    for (SparseSet<unsigned>::iterator
-        RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
-      if (!MRI->reg_nodbg_empty(*RegI))
-        MRI->setPhysRegUsed(*RegI);
-  } else {
-    for (SparseSet<unsigned>::iterator
-        I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
-      unsigned Reg = *I;
-      if (MRI->reg_nodbg_empty(Reg))
-        continue;
-      // Check if this register has a use that will impact the rest of the
-      // code. Uses in debug and noreturn instructions do not impact the
-      // generated code.
-      for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
-        if (!NoReturnInsts.count(&It)) {
-          MRI->setPhysRegUsed(Reg);
-          break;
-        }
-      }
-    }
-  }
 }
 
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index dbc0d91a01e2..0d26ed333ca7 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -155,7 +155,7 @@ private:
   // outlined but before the outlined code is pruned from the parent function.
   DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
 
-  // Map from outlined handler to call to llvm.frameaddress(1). Only used for
+  // Map from outlined handler to call to parent local address. Only used for
   // 32-bit EH.
   DenseMap<Function *, Value *> HandlerToParentFP;
 
@@ -533,9 +533,9 @@ void WinEHPrepare::findSEHEHReturnPoints(
     BasicBlock *NextBB;
     Constant *Selector;
     if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) {
-      // Split the edge if there is a phi node. Returning from EH to a phi node
-      // is just as impossible as having a phi after an indirectbr.
-      if (isa<PHINode>(CatchHandler->begin())) {
+      // Split the edge if there are multiple predecessors. This creates a place
+      // where we can insert EH recovery code.
+      if (!CatchHandler->getSinglePredecessor()) {
         DEBUG(dbgs() << "splitting EH return edge from " << BB->getName()
                      << " to " << CatchHandler->getName() << '\n');
         BBI = CatchHandler = SplitCriticalEdge(
@@ -616,6 +616,26 @@ void WinEHPrepare::demoteValuesLiveAcrossHandlers(
   // identifyEHBlocks() should have been called before this function.
   assert(!NormalBlocks.empty());
 
+  // Try to avoid demoting EH pointer and selector values. They get in the way
+  // of our pattern matching.
+  SmallPtrSet<Instruction *, 10> EHVals;
+  for (BasicBlock &BB : F) {
+    LandingPadInst *LP = BB.getLandingPadInst();
+    if (!LP)
+      continue;
+    EHVals.insert(LP);
+    for (User *U : LP->users()) {
+      auto *EI = dyn_cast<ExtractValueInst>(U);
+      if (!EI)
+        continue;
+      EHVals.insert(EI);
+      for (User *U2 : EI->users()) {
+        if (auto *PN = dyn_cast<PHINode>(U2))
+          EHVals.insert(PN);
+      }
+    }
+  }
+
   SetVector<Argument *> ArgsToDemote;
   SetVector<Instruction *> InstrsToDemote;
   for (BasicBlock &BB : F) {
@@ -641,7 +661,11 @@ void WinEHPrepare::demoteValuesLiveAcrossHandlers(
           continue;
         }
 
+        // Don't demote EH values.
         auto *OpI = cast<Instruction>(Op);
+        if (EHVals.count(OpI))
+          continue;
+
         BasicBlock *OpBB = OpI->getParent();
         // If a value is produced and consumed in the same BB, we don't need to
         // demote it.
@@ -822,7 +846,8 @@ bool WinEHPrepare::prepareExceptionHandlers(
     LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
 
     // Rewrite uses of the exception pointer to loads of an alloca.
-    for (Instruction *E : SEHCodeUses) {
+    while (!SEHCodeUses.empty()) {
+      Instruction *E = SEHCodeUses.pop_back_val();
       SmallVector<Use *, 4> Uses;
       for (Use &U : E->uses())
         Uses.push_back(&U);
@@ -830,13 +855,10 @@ bool WinEHPrepare::prepareExceptionHandlers(
         auto *I = cast<Instruction>(U->getUser());
         if (isa<ResumeInst>(I))
           continue;
-        LoadInst *LI;
         if (auto *Phi = dyn_cast<PHINode>(I))
-          LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false,
-                            Phi->getIncomingBlock(*U));
+          SEHCodeUses.push_back(Phi);
         else
-          LI = new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I);
-        U->set(LI);
+          U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I));
       }
       E->replaceAllUsesWith(UndefValue::get(E->getType()));
       E->eraseFromParent();
@@ -953,16 +975,16 @@ bool WinEHPrepare::prepareExceptionHandlers(
   Builder.SetInsertPoint(Entry->getFirstInsertionPt());
 
   Function *FrameEscapeFn =
-      Intrinsic::getDeclaration(M, Intrinsic::frameescape);
+      Intrinsic::getDeclaration(M, Intrinsic::localescape);
   Function *RecoverFrameFn =
-      Intrinsic::getDeclaration(M, Intrinsic::framerecover);
+      Intrinsic::getDeclaration(M, Intrinsic::localrecover);
   SmallVector<Value *, 8> AllocasToEscape;
 
-  // Scan the entry block for an existing call to llvm.frameescape. We need to
+  // Scan the entry block for an existing call to llvm.localescape. We need to
   // keep escaping those objects.
   for (Instruction &I : F.front()) {
     auto *II = dyn_cast<IntrinsicInst>(&I);
-    if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+    if (II && II->getIntrinsicID() == Intrinsic::localescape) {
       auto Args = II->arg_operands();
       AllocasToEscape.append(Args.begin(), Args.end());
       II->eraseFromParent();
@@ -971,7 +993,7 @@ bool WinEHPrepare::prepareExceptionHandlers(
   }
 
   // Finally, replace all of the temporary allocas for frame variables used in
-  // the outlined handlers with calls to llvm.framerecover.
+  // the outlined handlers with calls to llvm.localrecover.
   for (auto &VarInfoEntry : FrameVarInfo) {
     Value *ParentVal = VarInfoEntry.first;
     TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
@@ -992,7 +1014,7 @@ bool WinEHPrepare::prepareExceptionHandlers(
       llvm::Value *FP = HandlerToParentFP[HandlerFn];
       assert(FP);
 
-      // FIXME: Sink this framerecover into the blocks where it is used.
+      // FIXME: Sink this localrecover into the blocks where it is used.
       Builder.SetInsertPoint(TempAlloca);
       Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
       Value *RecoverArgs[] = {
@@ -1014,7 +1036,7 @@ bool WinEHPrepare::prepareExceptionHandlers(
     }
   } // End for each FrameVarInfo entry.
 
-  // Insert 'call void (...)* @llvm.frameescape(...)' at the end of the entry
+  // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry
   // block.
   Builder.SetInsertPoint(&F.getEntryBlock().back());
   Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
@@ -1595,9 +1617,8 @@ void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
     VMap[Extract] = SelectorValue;
 }
 
-static bool isFrameAddressCall(const Value *V) {
-  return match(const_cast<Value *>(V),
-               m_Intrinsic<Intrinsic::frameaddress>(m_SpecificInt(0)));
+static bool isLocalAddressCall(const Value *V) {
+  return match(const_cast<Value *>(V), m_Intrinsic<Intrinsic::localaddress>());
 }
 
 CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
@@ -1639,9 +1660,9 @@ CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
   if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
     return handleTypeIdFor(VMap, Inst, NewBB);
 
-  // When outlining llvm.frameaddress(i32 0), remap that to the second argument,
+  // When outlining llvm.localaddress(), remap that to the second argument,
   // which is the FP of the parent.
-  if (isFrameAddressCall(Inst)) {
+  if (isLocalAddressCall(Inst)) {
     VMap[Inst] = ParentFP;
     return CloningDirector::SkipInstruction;
   }
@@ -1961,7 +1982,7 @@ Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
   // If we're asked to materialize a static alloca, we temporarily create an
   // alloca in the outlined function and add this to the FrameVarInfo map.  When
   // all the outlining is complete, we'll replace these temporary allocas with
-  // calls to llvm.framerecover.
+  // calls to llvm.localrecover.
   if (auto *AV = dyn_cast<AllocaInst>(V)) {
     assert(AV->isStaticAlloca() &&
            "cannot materialize un-demoted dynamic alloca");
@@ -1991,7 +2012,7 @@ void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
   // of a catch parameter, add a sentinel to the multimap to indicate that it's
   // used from another handler. This will prevent us from trying to sink the
   // alloca into the handler and ensure that the catch parameter is present in
-  // the call to llvm.frameescape.
+  // the call to llvm.localescape.
   FrameVarInfo[V].push_back(getCatchObjectSentinel());
 }
 
@@ -2233,16 +2254,16 @@ static void createCleanupHandler(LandingPadActions &Actions,
 static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
                                          Instruction *MaybeCall) {
   // Look for finally blocks that Clang has already outlined for us.
-  //   %fp = call i8* @llvm.frameaddress(i32 0)
+  //   %fp = call i8* @llvm.localaddress()
   //   call void @"fin$parent"(iN 1, i8* %fp)
-  if (isFrameAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
+  if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
     MaybeCall = MaybeCall->getNextNode();
   CallSite FinallyCall(MaybeCall);
   if (!FinallyCall || FinallyCall.arg_size() != 2)
     return CallSite();
   if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
     return CallSite();
-  if (!isFrameAddressCall(FinallyCall.getArgument(1)))
+  if (!isLocalAddressCall(FinallyCall.getArgument(1)))
     return CallSite();
   return FinallyCall;
 }
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index c25ddad33b76..96bcf15e0af0 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -677,7 +677,13 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
         // First calculate the address of the symbol or section as it appears
         // in the objct file
         if (Sym != Obj.symbol_end()) {
-          Sym->getAddress(SymAddr);
+          ErrorOr<uint64_t> SymAddrOrErr = Sym->getAddress();
+          if (std::error_code EC = SymAddrOrErr.getError()) {
+            errs() << "error: failed to compute symbol address: "
+                   << EC.message() << '\n';
+            continue;
+          }
+          SymAddr = *SymAddrOrErr;
           // Also remember what section this symbol is in for later
           Sym->getSection(RSec);
         } else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
index 348308897dc4..331d2141b0e2 100644
--- a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -3,4 +3,6 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/.. )
 add_llvm_library(LLVMIntelJITEvents
   IntelJITEventListener.cpp
   jitprofiling.c
-  )
+
+  LINK_LIBS pthread ${CMAKE_DL_LIBS}
+)
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 907144007fdd..a131763193c0 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -113,63 +113,59 @@ void IntelJITEventListener::NotifyObjectEmitted(
     std::vector<LineNumberInfo> LineInfo;
     std::string SourceFileName;
 
-    if (Sym.getType() == SymbolRef::ST_Function) {
-      ErrorOr<StringRef> Name = Sym.getName();
-      if (!Name)
-        continue;
-
-      uint64_t Addr;
-      if (Sym.getAddress(Addr))
-        continue;
-      uint64_t Size = P.second;
-
-      // Record this address in a local vector
-      Functions.push_back((void*)Addr);
-
-      // Build the function loaded notification message
-      iJIT_Method_Load FunctionMessage =
-          FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
-      if (Context) {
-        DILineInfoTable  Lines = Context->getLineInfoForAddressRange(Addr, Size);
-        DILineInfoTable::iterator  Begin = Lines.begin();
-        DILineInfoTable::iterator  End = Lines.end();
-        for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
-          LineInfo.push_back(DILineInfoToIntelJITFormat((uintptr_t)Addr,
-                                                        It->first,
-                                                        It->second));
-        }
-        if (LineInfo.size() == 0) {
-          FunctionMessage.source_file_name = 0;
-          FunctionMessage.line_number_size = 0;
-          FunctionMessage.line_number_table = 0;
-        } else {
-          // Source line information for the address range is provided as 
-          // a code offset for the start of the corresponding sub-range and
-          // a source line. JIT API treats offsets in LineNumberInfo structures
-          // as the end of the corresponding code region. The start of the code
-          // is taken from the previous element. Need to shift the elements.
-
-          LineNumberInfo last = LineInfo.back();
-          last.Offset = FunctionMessage.method_size;
-          LineInfo.push_back(last);
-          for (size_t i = LineInfo.size() - 2; i > 0; --i)
-            LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber;
-
-          SourceFileName = Lines.front().second.FileName;
-          FunctionMessage.source_file_name = const_cast<char *>(SourceFileName.c_str());
-          FunctionMessage.line_number_size = LineInfo.size();
-          FunctionMessage.line_number_table = &*LineInfo.begin();
-        }
-      } else {
-        FunctionMessage.source_file_name = 0;
-        FunctionMessage.line_number_size = 0;
-        FunctionMessage.line_number_table = 0;
-      }
-
-      Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
-                                &FunctionMessage);
-      MethodIDs[(void*)Addr] = FunctionMessage.method_id;
+    if (Sym.getType() != SymbolRef::ST_Function)
+      continue;
+
+    ErrorOr<StringRef> Name = Sym.getName();
+    if (!Name)
+      continue;
+
+    ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
+    if (AddrOrErr.getError())
+      continue;
+    uint64_t Addr = *AddrOrErr;
+    uint64_t Size = P.second;
+
+    // Record this address in a local vector
+    Functions.push_back((void*)Addr);
+
+    // Build the function loaded notification message
+    iJIT_Method_Load FunctionMessage =
+      FunctionDescToIntelJITFormat(*Wrapper, Name->data(), Addr, Size);
+    DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
+    DILineInfoTable::iterator Begin = Lines.begin();
+    DILineInfoTable::iterator End = Lines.end();
+    for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+      LineInfo.push_back(
+          DILineInfoToIntelJITFormat((uintptr_t)Addr, It->first, It->second));
     }
+    if (LineInfo.size() == 0) {
+      FunctionMessage.source_file_name = 0;
+      FunctionMessage.line_number_size = 0;
+      FunctionMessage.line_number_table = 0;
+    } else {
+      // Source line information for the address range is provided as
+      // a code offset for the start of the corresponding sub-range and
+      // a source line. JIT API treats offsets in LineNumberInfo structures
+      // as the end of the corresponding code region. The start of the code
+      // is taken from the previous element. Need to shift the elements.
+
+      LineNumberInfo last = LineInfo.back();
+      last.Offset = FunctionMessage.method_size;
+      LineInfo.push_back(last);
+      for (size_t i = LineInfo.size() - 2; i > 0; --i)
+        LineInfo[i].LineNumber = LineInfo[i - 1].LineNumber;
+
+      SourceFileName = Lines.front().second.FileName;
+      FunctionMessage.source_file_name =
+        const_cast<char *>(SourceFileName.c_str());
+      FunctionMessage.line_number_size = LineInfo.size();
+      FunctionMessage.line_number_table = &*LineInfo.begin();
+    }
+
+    Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+                              &FunctionMessage);
+    MethodIDs[(void*)Addr] = FunctionMessage.method_id;
   }
 
   // To support object unload notification, we need to keep a list of
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
index 1247cbd94930..afea3ecccda4 100644
--- a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -21,4 +21,4 @@
 type = OptionalLibrary
 name = IntelJITEvents
 parent = ExecutionEngine
-required_libraries = Core DebugInfoDWARF Support
+required_libraries = Core DebugInfoDWARF Support Object ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
index e30516eb3b01..7d5550046a56 100644
--- a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
@@ -21,3 +21,4 @@
 type = OptionalLibrary
 name = OProfileJIT
 parent = ExecutionEngine
+required_libraries = Support Object ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index b72033805269..324d07118704 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -88,24 +88,27 @@ void OProfileJITEventListener::NotifyObjectEmitted(
   // Use symbol info to iterate functions in the object.
   for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
     SymbolRef Sym = P.first;
-    if (Sym.getType() == SymbolRef::ST_Function) {
-      StringRef  Name;
-      uint64_t   Addr;
-      if (Sym.getName(Name))
-        continue;
-      if (Sym.getAddress(Addr))
-        continue;
-      uint64_t Size = P.second;
-
-      if (Wrapper->op_write_native_code(Name.data(), Addr, (void*)Addr, Size)
-                        == -1) {
-        DEBUG(dbgs() << "Failed to tell OProfile about native function "
-          << Name << " at ["
-          << (void*)Addr << "-" << ((char*)Addr + Size) << "]\n");
-        continue;
-      }
-      // TODO: support line number info (similar to IntelJITEventListener.cpp)
+    if (Sym.getType() != SymbolRef::ST_Function)
+      continue;
+
+    ErrorOr<StringRef> NameOrErr = Sym.getName();
+    if (NameOrErr.getError())
+      continue;
+    StringRef Name = *NameOrErr;
+    ErrorOr<uint64_t> AddrOrErr = Sym.getAddress();
+    if (AddrOrErr.getError())
+      continue;
+    uint64_t Addr = *AddrOrErr;
+    uint64_t Size = P.second;
+
+    if (Wrapper->op_write_native_code(Name.data(), Addr, (void *)Addr, Size) ==
+        -1) {
+      DEBUG(dbgs() << "Failed to tell OProfile about native function " << Name
+                   << " at [" << (void *)Addr << "-" << ((char *)Addr + Size)
+                   << "]\n");
+      continue;
     }
+    // TODO: support line number info (similar to IntelJITEventListener.cpp)
   }
 
   DebugObjects[Obj.getData().data()] = std::move(DebugObjOwner);
@@ -126,8 +129,10 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
                          E = DebugObj.symbol_end();
          I != E; ++I) {
       if (I->getType() == SymbolRef::ST_Function) {
-        uint64_t   Addr;
-        if (I->getAddress(Addr)) continue;
+        ErrorOr<uint64_t> AddrOrErr = I->getAddress();
+        if (AddrOrErr.getError())
+          continue;
+        uint64_t Addr = *AddrOrErr;
 
         if (Wrapper->op_unload_native_code(Addr) == -1) {
           DEBUG(dbgs()
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index fa501824e04a..93287a3a4e71 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -113,28 +113,12 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
   llvm_unreachable("Attempting to remap address of unknown section!");
 }
 
-static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
-  uint64_t Address;
-  if (std::error_code EC = Sym.getAddress(Address))
+static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec,
+                                 uint64_t &Result) {
+  ErrorOr<uint64_t> AddressOrErr = Sym.getAddress();
+  if (std::error_code EC = AddressOrErr.getError())
     return EC;
-
-  if (Address == UnknownAddress) {
-    Result = UnknownAddress;
-    return std::error_code();
-  }
-
-  const ObjectFile *Obj = Sym.getObject();
-  section_iterator SecI(Obj->section_begin());
-  if (std::error_code EC = Sym.getSection(SecI))
-    return EC;
-
-  if (SecI == Obj->section_end()) {
-    Result = UnknownAddress;
-    return std::error_code();
-  }
-
-  uint64_t SectionAddress = SecI->getAddress();
-  Result = Address - SectionAddress;
+  Result = *AddressOrErr - Sec.getAddress();
   return std::error_code();
 }
 
@@ -184,12 +168,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
         ErrorOr<StringRef> NameOrErr = I->getName();
         Check(NameOrErr.getError());
         StringRef Name = *NameOrErr;
-        uint64_t SectOffset;
-        Check(getOffset(*I, SectOffset));
         section_iterator SI = Obj.section_end();
         Check(I->getSection(SI));
         if (SI == Obj.section_end())
           continue;
+        uint64_t SectOffset;
+        Check(getOffset(*I, *SI, SectOffset));
         StringRef SectionData;
         Check(SI->getContents(SectionData));
         bool IsCode = SI->isText();
@@ -814,12 +798,16 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
         report_fatal_error("Program used external function '" + Name +
                            "' which could not be resolved!");
 
-      DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t"
-                   << format("0x%lx", Addr) << "\n");
-      // This list may have been updated when we called getSymbolAddress, so
-      // don't change this code to get the list earlier.
-      RelocationList &Relocs = i->second;
-      resolveRelocationList(Relocs, Addr);
+      // If Resolver returned UINT64_MAX, the client wants to handle this symbol
+      // manually and we shouldn't resolve its relocations.
+      if (Addr != UINT64_MAX) {
+        DEBUG(dbgs() << "Resolving relocations Name: " << Name << "\t"
+                     << format("0x%lx", Addr) << "\n");
+        // This list may have been updated when we called getSymbolAddress, so
+        // don't change this code to get the list earlier.
+        RelocationList &Relocs = i->second;
+        resolveRelocationList(Relocs, Addr);
+      }
     }
 
     ExternalSymbolRelocations.erase(i);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 957571b092da..ae199b720223 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -727,7 +727,9 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
 }
 
 bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
-  return getRTDyld().getSymbolLocalAddress(Symbol) != nullptr;
+  if (getRTDyld().getSymbolLocalAddress(Symbol))
+    return true;
+  return !!getRTDyld().Resolver.findSymbol(Symbol);
 }
 
 uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index f5069c005857..3787950b3b08 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -511,11 +511,54 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
     Insn |= Value & 0xffff;
     writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
-  case ELF::R_MIPS_PC32:
+  case ELF::R_MIPS_PC32: {
+    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4);
+    break;
+  }
+  case ELF::R_MIPS_PC16: {
+    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    Insn &= 0xffff0000;
+    Insn |= ((Value - FinalAddress) >> 2) & 0xffff;
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  }
+  case ELF::R_MIPS_PC19_S2: {
+    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    Insn &= 0xfff80000;
+    Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  }
+  case ELF::R_MIPS_PC21_S2: {
+    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    Insn &= 0xffe00000;
+    Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff;
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  }
+  case ELF::R_MIPS_PC26_S2: {
+    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    Insn &= 0xfc000000;
+    Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff;
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  }
+  case ELF::R_MIPS_PCHI16: {
     uint32_t FinalAddress = (Section.LoadAddress + Offset);
-    writeBytesUnaligned(Value + Addend - FinalAddress, (uint8_t *)TargetPtr, 4);
+    Insn &= 0xffff0000;
+    Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff;
+    writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
   }
+  case ELF::R_MIPS_PCLO16: {
+    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    Insn &= 0xffff0000;
+    Insn |= (Value - FinalAddress) & 0xffff;
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  }
+  }
 }
 
 void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
@@ -1263,12 +1306,24 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         Section.StubOffset += getMaxStubSize();
       }
     } else {
-      if (RelType == ELF::R_MIPS_HI16)
+      // FIXME: Calculate correct addends for R_MIPS_HI16, R_MIPS_LO16,
+      // R_MIPS_PCHI16 and R_MIPS_PCLO16 relocations.
+      if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16)
         Value.Addend += (Opcode & 0x0000ffff) << 16;
       else if (RelType == ELF::R_MIPS_LO16)
         Value.Addend += (Opcode & 0x0000ffff);
       else if (RelType == ELF::R_MIPS_32)
         Value.Addend += Opcode;
+      else if (RelType == ELF::R_MIPS_PCLO16)
+        Value.Addend += SignExtend32<16>((Opcode & 0x0000ffff));
+      else if (RelType == ELF::R_MIPS_PC16)
+        Value.Addend += SignExtend32<18>((Opcode & 0x0000ffff) << 2);
+      else if (RelType == ELF::R_MIPS_PC19_S2)
+        Value.Addend += SignExtend32<21>((Opcode & 0x0007ffff) << 2);
+      else if (RelType == ELF::R_MIPS_PC21_S2)
+        Value.Addend += SignExtend32<23>((Opcode & 0x001fffff) << 2);
+      else if (RelType == ELF::R_MIPS_PC26_S2)
+        Value.Addend += SignExtend32<28>((Opcode & 0x03ffffff) << 2);
       processSimpleRelocation(SectionID, Offset, RelType, Value);
     }
   } else if (IsMipsN64ABI) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 74b13d60a984..c0741141757c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -89,19 +89,11 @@ RelocationValueRef RuntimeDyldMachO::getRelocationValueRef(
 }
 
 void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value,
-                                            const ObjectFile &BaseTObj,
                                             const relocation_iterator &RI,
                                             unsigned OffsetToNextPC) {
-  const MachOObjectFile &Obj =
-      static_cast<const MachOObjectFile &>(BaseTObj);
-  MachO::any_relocation_info RelInfo =
-      Obj.getRelocation(RI->getRawDataRefImpl());
-
-  bool IsPCRel = Obj.getAnyRelocationPCRel(RelInfo);
-  if (IsPCRel) {
-    ErrorOr<uint64_t> RelocAddr = RI->getAddress();
-    Value.Offset += *RelocAddr + OffsetToNextPC;
-  }
+  auto &O = *cast<MachOObjectFile>(RI->getObject());
+  section_iterator SecI = O.getRelocationRelocatedSection(RI);
+  Value.Offset += RI->getOffset() + OffsetToNextPC + SecI->getAddress();
 }
 
 void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 36ba8d1b93e7..0d7364f78597 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -95,7 +95,6 @@ protected:
 
   /// Make the RelocationValueRef addend PC-relative.
   void makeValueAddendPCRel(RelocationValueRef &Value,
-                            const ObjectFile &BaseTObj,
                             const relocation_iterator &RI,
                             unsigned OffsetToNextPC);
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 99fd6e333b47..7bf764114bae 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -284,7 +284,7 @@ public:
 
     bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
     if (!IsExtern && RE.IsPCRel)
-      makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size);
+      makeValueAddendPCRel(Value, RelI, 1 << RE.Size);
 
     RE.Addend = Value.Offset;
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 0d9445e84f09..0a24bb2f5eae 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -74,7 +74,7 @@ public:
         getRelocationValueRef(Obj, RelI, RE, ObjSectionToID));
 
     if (RE.IsPCRel)
-      makeValueAddendPCRel(Value, Obj, RelI, 8);
+      makeValueAddendPCRel(Value, RelI, 8);
 
     if ((RE.RelType & 0xf) == MachO::ARM_RELOC_BR24)
       processBranchRelocation(RE, Value, Stubs);
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index aceb304abb1e..569a078d7f3d 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -68,7 +68,7 @@ public:
     //   Value.Addend += RelocAddr + 4;
     // }
     if (RE.IsPCRel)
-      makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size);
+      makeValueAddendPCRel(Value, RelI, 1 << RE.Size);
 
     RE.Addend = Value.Offset;
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 4b3b01ba3c96..dd56e72f9144 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -50,7 +50,7 @@ public:
 
     bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
     if (!IsExtern && RE.IsPCRel)
-      makeValueAddendPCRel(Value, Obj, RelI, 1 << RE.Size);
+      makeValueAddendPCRel(Value, RelI, 1 << RE.Size);
 
     if (RE.RelType == MachO::X86_64_RELOC_GOT ||
         RE.RelType == MachO::X86_64_RELOC_GOT_LOAD)
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index c3032f4ffc79..546a98670a29 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -190,6 +190,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "sanitize_address";
   if (hasAttribute(Attribute::AlwaysInline))
     return "alwaysinline";
+  if (hasAttribute(Attribute::ArgMemOnly))
+    return "argmemonly";
   if (hasAttribute(Attribute::Builtin))
     return "builtin";
   if (hasAttribute(Attribute::ByVal))
@@ -447,6 +449,9 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
     llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
                      "format");
     break;
+  case Attribute::ArgMemOnly:
+    llvm_unreachable("argmemonly attribute not supported in raw format");
+    break;
   }
   llvm_unreachable("Unsupported attribute type");
 }
@@ -1356,7 +1361,8 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
   for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
        I = Attribute::AttrKind(I + 1)) {
     if (I == Attribute::Dereferenceable ||
-        I == Attribute::DereferenceableOrNull)
+        I == Attribute::DereferenceableOrNull ||
+        I == Attribute::ArgMemOnly)
       continue;
     if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
       Attrs[I] = true;
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index 70a55186ea9a..f1c6ebd4846e 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -229,6 +229,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
   NewFn = nullptr;
   bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+  assert(F != NewFn && "Intrinsic function upgraded to the same function");
 
   // Upgrade intrinsic attributes.  This does not change the function.
   if (NewFn)
@@ -710,16 +711,14 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   // Upgrade the function and check if it is a totaly new function.
   Function *NewFn;
   if (UpgradeIntrinsicFunction(F, NewFn)) {
-    if (NewFn != F) {
-      // Replace all uses to the old function with the new one if necessary.
-      for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
-           UI != UE; ) {
-        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
-          UpgradeIntrinsicCall(CI, NewFn);
-      }
-      // Remove old function, no longer used, from the module.
-      F->eraseFromParent();
+    // Replace all uses to the old function with the new one if necessary.
+    for (Value::user_iterator UI = F->user_begin(), UE = F->user_end();
+         UI != UE;) {
+      if (CallInst *CI = dyn_cast<CallInst>(*UI++))
+        UpgradeIntrinsicCall(CI, NewFn);
     }
+    // Remove old function, no longer used, from the module.
+    F->eraseFromParent();
   }
 }
 
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 77cb10d5b6ba..0a0449434a7b 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -163,47 +163,40 @@ CallInst *BasicBlock::getTerminatingMustTailCall() {
 }
 
 Instruction* BasicBlock::getFirstNonPHI() {
-  BasicBlock::iterator i = begin();
-  // All valid basic blocks should have a terminator,
-  // which is not a PHINode. If we have an invalid basic
-  // block we'll get an assertion failure when dereferencing
-  // a past-the-end iterator.
-  while (isa<PHINode>(i)) ++i;
-  return &*i;
+  for (Instruction &I : *this)
+    if (!isa<PHINode>(I))
+      return &I;
+  return nullptr;
 }
 
 Instruction* BasicBlock::getFirstNonPHIOrDbg() {
-  BasicBlock::iterator i = begin();
-  // All valid basic blocks should have a terminator,
-  // which is not a PHINode. If we have an invalid basic
-  // block we'll get an assertion failure when dereferencing
-  // a past-the-end iterator.
-  while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
-  return &*i;
+  for (Instruction &I : *this)
+    if (!isa<PHINode>(I) && !isa<DbgInfoIntrinsic>(I))
+      return &I;
+  return nullptr;
 }
 
 Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
-  // All valid basic blocks should have a terminator,
-  // which is not a PHINode. If we have an invalid basic
-  // block we'll get an assertion failure when dereferencing
-  // a past-the-end iterator.
-  BasicBlock::iterator i = begin();
-  for (;; ++i) {
-    if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+  for (Instruction &I : *this) {
+    if (isa<PHINode>(I) || isa<DbgInfoIntrinsic>(I))
       continue;
 
-    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
-    if (!II)
-      break;
-    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
-        II->getIntrinsicID() != Intrinsic::lifetime_end)
-      break;
+    if (auto *II = dyn_cast<IntrinsicInst>(&I))
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+          II->getIntrinsicID() == Intrinsic::lifetime_end)
+        continue;
+
+    return &I;
   }
-  return &*i;
+  return nullptr;
 }
 
 BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
-  iterator InsertPt = getFirstNonPHI();
+  Instruction *FirstNonPHI = getFirstNonPHI();
+  if (!FirstNonPHI)
+    return end();
+
+  iterator InsertPt = FirstNonPHI;
   if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
   return InsertPt;
 }
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 23e923d41126..e0e729d534bd 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1691,6 +1691,14 @@ void LLVMDeleteFunction(LLVMValueRef Fn) {
   unwrap<Function>(Fn)->eraseFromParent();
 }
 
+LLVMValueRef LLVMGetPersonalityFn(LLVMValueRef Fn) {
+  return wrap(unwrap<Function>(Fn)->getPersonalityFn());
+}
+
+void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn) {
+  unwrap<Function>(Fn)->setPersonalityFn(unwrap<Constant>(PersonalityFn));
+}
+
 unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
   if (Function *F = dyn_cast<Function>(unwrap(Fn)))
     return F->getIntrinsicID();
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 6a3ff0e8e457..2a90e70af1a3 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -73,37 +73,47 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
 }
 
 void DIBuilder::finalize() {
-  if (CUNode) {
-    CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
-
-    SmallVector<Metadata *, 16> RetainValues;
-    // Declarations and definitions of the same type may be retained. Some
-    // clients RAUW these pairs, leaving duplicates in the retained types
-    // list. Use a set to remove the duplicates while we transform the
-    // TrackingVHs back into Values.
-    SmallPtrSet<Metadata *, 16> RetainSet;
-    for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
-      if (RetainSet.insert(AllRetainTypes[I]).second)
-        RetainValues.push_back(AllRetainTypes[I]);
+  if (!CUNode) {
+    assert(!AllowUnresolvedNodes &&
+           "creating type nodes without a CU is not supported");
+    return;
+  }
+
+  CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
+
+  SmallVector<Metadata *, 16> RetainValues;
+  // Declarations and definitions of the same type may be retained. Some
+  // clients RAUW these pairs, leaving duplicates in the retained types
+  // list. Use a set to remove the duplicates while we transform the
+  // TrackingVHs back into Values.
+  SmallPtrSet<Metadata *, 16> RetainSet;
+  for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++)
+    if (RetainSet.insert(AllRetainTypes[I]).second)
+      RetainValues.push_back(AllRetainTypes[I]);
+
+  if (!RetainValues.empty())
     CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues));
 
-    DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+  DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
+  if (!AllSubprograms.empty())
     CUNode->replaceSubprograms(SPs.get());
-    for (auto *SP : SPs) {
-      if (MDTuple *Temp = SP->getVariables().get()) {
-        const auto &PV = PreservedVariables.lookup(SP);
-        SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
-        DINodeArray AV = getOrCreateArray(Variables);
-        TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
-      }
+
+  for (auto *SP : SPs) {
+    if (MDTuple *Temp = SP->getVariables().get()) {
+      const auto &PV = PreservedVariables.lookup(SP);
+      SmallVector<Metadata *, 4> Variables(PV.begin(), PV.end());
+      DINodeArray AV = getOrCreateArray(Variables);
+      TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
     }
+  }
 
+  if (!AllGVs.empty())
     CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs));
 
+  if (!AllImportedModules.empty())
     CUNode->replaceImportedEntities(MDTuple::get(
         VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
                                                AllImportedModules.end())));
-  }
 
   // Now that all temp nodes have been replaced or deleted, resolve remaining
   // cycles.
@@ -585,7 +595,7 @@ DILocalVariable *DIBuilder::createLocalVariable(
     DIType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
   // FIXME: Why getNonCompileUnitScope()?
   // FIXME: Why is "!Context" okay here?
-  // FIXME: WHy doesn't this check for a subprogram or lexical block (AFAICT
+  // FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
   // the only valid scopes)?
   DIScope *Context = getNonCompileUnitScope(Scope);
 
@@ -593,7 +603,7 @@ DILocalVariable *DIBuilder::createLocalVariable(
       VMContext, Tag, cast_or_null<DILocalScope>(Context), Name, File, LineNo,
       DITypeRef::get(Ty), ArgNo, Flags);
   if (AlwaysPreserve) {
-    // The optimizer may remove local variable. If there is an interest
+    // The optimizer may remove local variables. If there is an interest
     // to preserve variable info in such situation then stash it in a
     // named mdnode.
     DISubprogram *Fn = getDISubprogram(Scope);
@@ -857,7 +867,7 @@ void DIBuilder::replaceArrays(DICompositeType *&T, DINodeArray Elements,
   if (!T->isResolved())
     return;
 
-  // If "T" is resolved, it may be due to a self-reference cycle.  Track the
+  // If T is resolved, it may be due to a self-reference cycle.  Track the
   // arrays explicitly if they're unresolved, or else the cycles will be
   // orphaned.
   if (Elements)
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index e3258895ea5e..b6a8bbcbe5fa 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -62,18 +62,14 @@ bool BasicBlockEdge::isSingleEdge() const {
 //
 //===----------------------------------------------------------------------===//
 
-TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
-TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
-
-#define LLVM_COMMA ,
-TEMPLATE_INSTANTIATION(void llvm::Calculate<Function LLVM_COMMA BasicBlock *>(
-    DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT LLVM_COMMA
-        Function &F));
-TEMPLATE_INSTANTIATION(
-    void llvm::Calculate<Function LLVM_COMMA Inverse<BasicBlock *> >(
-        DominatorTreeBase<GraphTraits<Inverse<BasicBlock *> >::NodeType> &DT
-            LLVM_COMMA Function &F));
-#undef LLVM_COMMA
+template class llvm::DomTreeNodeBase<BasicBlock>;
+template class llvm::DominatorTreeBase<BasicBlock>;
+
+template void llvm::Calculate<Function, BasicBlock *>(
+    DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT, Function &F);
+template void llvm::Calculate<Function, Inverse<BasicBlock *>>(
+    DominatorTreeBase<GraphTraits<Inverse<BasicBlock *>>::NodeType> &DT,
+    Function &F);
 
 // dominates - Return true if Def dominates a use in User. This performs
 // the special checks necessary if Def and User are in the same basic block.
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 78d1adb5e700..f554d590284f 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -39,8 +39,6 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 //                                Value Class
 //===----------------------------------------------------------------------===//
-const unsigned Value::NumUserOperandsBits;
-
 static inline Type *checkType(Type *Ty) {
   assert(Ty && "Value defined with a null type: Error!");
   return Ty;
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 3c61165768f8..2a0a4ff393ed 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -184,12 +184,12 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// \brief Track unresolved string-based type references.
   SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
 
-  /// \brief Whether we've seen a call to @llvm.frameescape in this function
+  /// \brief Whether we've seen a call to @llvm.localescape in this function
   /// already.
   bool SawFrameEscape;
 
-  /// Stores the count of how many objects were passed to llvm.frameescape for a
-  /// given function and the largest index passed to llvm.framerecover.
+  /// Stores the count of how many objects were passed to llvm.localescape for a
+  /// given function and the largest index passed to llvm.localrecover.
   DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
 
 public:
@@ -438,6 +438,9 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
     Assert(GVar && GVar->getValueType()->isArrayTy(),
            "Only global arrays can have appending linkage!", GVar);
   }
+
+  if (GV.isDeclarationForLinker())
+    Assert(!GV.hasComdat(), "Declaration may not be in a Comdat!", &GV);
 }
 
 void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
@@ -1270,7 +1273,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
         I->getKindAsEnum() == Attribute::Cold ||
         I->getKindAsEnum() == Attribute::OptimizeNone ||
         I->getKindAsEnum() == Attribute::JumpTable ||
-        I->getKindAsEnum() == Attribute::Convergent) {
+        I->getKindAsEnum() == Attribute::Convergent ||
+        I->getKindAsEnum() == Attribute::ArgMemOnly) {
       if (!isFunction) {
         CheckFailed("Attribute '" + I->getAsString() +
                     "' only applies to functions!", V);
@@ -1528,8 +1532,9 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
 
   const Instruction &CI = *CS.getInstruction();
 
-  Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory(),
-         "gc.statepoint must read and write memory to preserve "
+  Assert(!CS.doesNotAccessMemory() && !CS.onlyReadsMemory() &&
+         !CS.onlyAccessesArgMemory(),
+         "gc.statepoint must read and write all memory to preserve "
          "reordering restrictions required by safepoint semantics",
          &CI);
 
@@ -1666,8 +1671,8 @@ void Verifier::verifyFrameRecoverIndices() {
     unsigned EscapedObjectCount = Counts.second.first;
     unsigned MaxRecoveredIndex = Counts.second.second;
     Assert(MaxRecoveredIndex <= EscapedObjectCount,
-           "all indices passed to llvm.framerecover must be less than the "
-           "number of arguments passed ot llvm.frameescape in the parent "
+           "all indices passed to llvm.localrecover must be less than the "
+           "number of arguments passed ot llvm.localescape in the parent "
            "function",
            F);
   }
@@ -2535,10 +2540,6 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Assert(isa<PointerType>(TargetTy),
          "GEP base pointer is not a vector or a vector of pointers", &GEP);
   Assert(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
-  Assert(GEP.getPointerOperandType()->isVectorTy() ==
-             GEP.getType()->isVectorTy(),
-         "Vector GEP must return a vector value", &GEP);
-
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
       GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
@@ -2548,17 +2549,20 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
              GEP.getResultElementType() == ElTy,
          "GEP is not of right type for indices!", &GEP, ElTy);
 
-  if (GEP.getPointerOperandType()->isVectorTy()) {
+  if (GEP.getType()->isVectorTy()) {
     // Additional checks for vector GEPs.
-    unsigned GepWidth = GEP.getPointerOperandType()->getVectorNumElements();
-    Assert(GepWidth == GEP.getType()->getVectorNumElements(),
-           "Vector GEP result width doesn't match operand's", &GEP);
+    unsigned GEPWidth = GEP.getType()->getVectorNumElements();
+    if (GEP.getPointerOperandType()->isVectorTy())
+      Assert(GEPWidth == GEP.getPointerOperandType()->getVectorNumElements(),
+             "Vector GEP result width doesn't match operand's", &GEP);
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i) {
       Type *IndexTy = Idxs[i]->getType();
-      Assert(IndexTy->isVectorTy(), "Vector GEP must have vector indices!",
-             &GEP);
-      unsigned IndexWidth = IndexTy->getVectorNumElements();
-      Assert(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+      if (IndexTy->isVectorTy()) {
+        unsigned IndexWidth = IndexTy->getVectorNumElements();
+        Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP);
+      }
+      Assert(IndexTy->getScalarType()->isIntegerTy(),
+             "All GEP indices should be of integer type");
     }
   }
   visitInstruction(GEP);
@@ -3276,32 +3280,32 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
            "llvm.invariant.end parameter #2 must be a constant integer", CS);
     break;
 
-  case Intrinsic::frameescape: {
+  case Intrinsic::localescape: {
     BasicBlock *BB = CS.getParent();
     Assert(BB == &BB->getParent()->front(),
-           "llvm.frameescape used outside of entry block", CS);
+           "llvm.localescape used outside of entry block", CS);
     Assert(!SawFrameEscape,
-           "multiple calls to llvm.frameescape in one function", CS);
+           "multiple calls to llvm.localescape in one function", CS);
     for (Value *Arg : CS.args()) {
       if (isa<ConstantPointerNull>(Arg))
         continue; // Null values are allowed as placeholders.
       auto *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
       Assert(AI && AI->isStaticAlloca(),
-             "llvm.frameescape only accepts static allocas", CS);
+             "llvm.localescape only accepts static allocas", CS);
     }
     FrameEscapeInfo[BB->getParent()].first = CS.getNumArgOperands();
     SawFrameEscape = true;
     break;
   }
-  case Intrinsic::framerecover: {
+  case Intrinsic::localrecover: {
     Value *FnArg = CS.getArgOperand(0)->stripPointerCasts();
     Function *Fn = dyn_cast<Function>(FnArg);
     Assert(Fn && !Fn->isDeclaration(),
-           "llvm.framerecover first "
+           "llvm.localrecover first "
            "argument must be function defined in this module",
            CS);
     auto *IdxArg = dyn_cast<ConstantInt>(CS.getArgOperand(2));
-    Assert(IdxArg, "idx argument of llvm.framerecover must be a constant int",
+    Assert(IdxArg, "idx argument of llvm.localrecover must be a constant int",
            CS);
     auto &Entry = FrameEscapeInfo[Fn];
     Entry.second = unsigned(
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 6131c3180249..53ed4175f8e3 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -473,6 +473,9 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
   if (def->hasComdat())
     attr |= LTO_SYMBOL_COMDAT;
 
+  if (isa<GlobalAlias>(def))
+    attr |= LTO_SYMBOL_ALIAS;
+
   auto Iter = _defines.insert(Name).first;
 
   // fill information structure
diff --git a/lib/LibDriver/LibDriver.cpp b/lib/LibDriver/LibDriver.cpp
index cb3278c716e6..b33a22ff0cf8 100644
--- a/lib/LibDriver/LibDriver.cpp
+++ b/lib/LibDriver/LibDriver.cpp
@@ -56,17 +56,13 @@ public:
 
 }
 
-static std::string getOutputPath(llvm::opt::InputArgList *Args) {
+static std::string getOutputPath(llvm::opt::InputArgList *Args,
+                                 const llvm::NewArchiveIterator &FirstMember) {
   if (auto *Arg = Args->getLastArg(OPT_out))
     return Arg->getValue();
-  for (auto *Arg : Args->filtered(OPT_INPUT)) {
-    if (!StringRef(Arg->getValue()).endswith_lower(".obj"))
-      continue;
-    SmallString<128> Val = StringRef(Arg->getValue());
-    llvm::sys::path::replace_extension(Val, ".lib");
-    return Val.str();
-  }
-  llvm_unreachable("internal error");
+  SmallString<128> Val = FirstMember.getNew();
+  llvm::sys::path::replace_extension(Val, ".lib");
+  return Val.str();
 }
 
 static std::vector<StringRef> getSearchPaths(llvm::opt::InputArgList *Args,
@@ -144,7 +140,10 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
   }
 
   std::pair<StringRef, std::error_code> Result =
-      llvm::writeArchive(getOutputPath(&Args), Members, /*WriteSymtab=*/true);
+      llvm::writeArchive(getOutputPath(&Args, Members[0]), Members,
+                         /*WriteSymtab=*/true, object::Archive::K_GNU,
+                         /*Deterministic*/ true);
+
   if (Result.second) {
     if (Result.first.empty())
       Result.first = ArgsArr[0];
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 13c5ca9561df..6554d6a9e60e 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMMC
   MCObjectStreamer.cpp
   MCObjectWriter.cpp
   MCRegisterInfo.cpp
+  MCSchedule.cpp
   MCSection.cpp
   MCSectionCOFF.cpp
   MCSectionELF.cpp
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 9a65a3158972..227c937e8d1b 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -503,7 +503,8 @@ void MCAsmStreamer::EndCOFFSymbolDef() {
 }
 
 void MCAsmStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
-  OS << "\t.safeseh\t" << *Symbol;
+  OS << "\t.safeseh\t";
+  Symbol->print(OS, MAI);
   EmitEOL();
 }
 
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index da6516a4ac92..f53b589e1aea 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -925,7 +925,7 @@ void MCAssembler::Finish() {
         Fixups = FragWithFixups->getFixups();
         Contents = FragWithFixups->getContents();
       } else
-        llvm_unreachable("Unknow fragment with fixups!");
+        llvm_unreachable("Unknown fragment with fixups!");
       for (const MCFixup &Fixup : Fixups) {
         uint64_t FixedValue;
         bool IsPCRel;
diff --git a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
index 68948d36d65c..5fc2ca44f5d4 100644
--- a/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp
@@ -16,6 +16,10 @@
 
 using namespace llvm;
 
+namespace llvm {
+class Triple;
+}
+
 // This function tries to add a symbolic operand in place of the immediate
 // Value in the MCInst. The immediate Value has had any PC adjustment made by
 // the caller. If the instruction is a branch instruction then IsBranch is true,
@@ -184,7 +188,7 @@ void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
 }
 
 namespace llvm {
-MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
+MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
                                  LLVMSymbolLookupCallback SymbolLookUp,
                                  void *DisInfo, MCContext *Ctx,
                                  std::unique_ptr<MCRelocationInfo> &&RelInfo) {
diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp
index decc2d84b252..5be2fa1b30b6 100644
--- a/lib/MC/MCInstrDesc.cpp
+++ b/lib/MC/MCInstrDesc.cpp
@@ -19,7 +19,7 @@
 
 using namespace llvm;
 
-bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, MCSubtargetInfo &STI,
+bool MCInstrDesc::getDeprecatedInfo(MCInst &MI, const MCSubtargetInfo &STI,
                                     std::string &Info) const {
   if (ComplexDeprecationInfo)
     return ComplexDeprecationInfo(MI, STI, Info);
diff --git a/lib/MC/MCSchedule.cpp b/lib/MC/MCSchedule.cpp
new file mode 100644
index 000000000000..f3919427bf05
--- /dev/null
+++ b/lib/MC/MCSchedule.cpp
@@ -0,0 +1,34 @@
+//===- MCSchedule.cpp - Scheduling ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default scheduling model.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSchedule.h"
+#include <type_traits>
+
+using namespace llvm;
+
+static_assert(std::is_pod<MCSchedModel>::value,
+              "We shouldn't have a static constructor here");
+const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
+                                            DefaultMicroOpBufferSize,
+                                            DefaultLoopMicroOpBufferSize,
+                                            DefaultLoadLatency,
+                                            DefaultHighLatency,
+                                            DefaultMispredictPenalty,
+                                            false,
+                                            true,
+                                            0,
+                                            nullptr,
+                                            nullptr,
+                                            0,
+                                            0,
+                                            nullptr};
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index ece775c4f08f..9210cf544b16 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -17,42 +17,34 @@
 
 using namespace llvm;
 
-/// InitMCProcessorInfo - Set or change the CPU (optionally supplemented
-/// with feature string). Recompute feature bits and scheduling model.
-void
-MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
+static FeatureBitset getFeatures(StringRef CPU, StringRef FS,
+                                 ArrayRef<SubtargetFeatureKV> ProcDesc,
+                                 ArrayRef<SubtargetFeatureKV> ProcFeatures) {
   SubtargetFeatures Features(FS);
-  FeatureBits = Features.getFeatureBits(CPU, ProcDesc, ProcFeatures);
-  InitCPUSchedModel(CPU);
+  return Features.getFeatureBits(CPU, ProcDesc, ProcFeatures);
 }
 
-void
-MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) {
+void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
+  FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures);
   if (!CPU.empty())
-    CPUSchedModel = getSchedModelForCPU(CPU);
+    CPUSchedModel = &getSchedModelForCPU(CPU);
   else
-    CPUSchedModel = MCSchedModel::GetDefaultSchedModel();
+    CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
 }
 
-void MCSubtargetInfo::InitMCSubtargetInfo(
+void MCSubtargetInfo::setDefaultFeatures(StringRef CPU) {
+  FeatureBits = getFeatures(CPU, "", ProcDesc, ProcFeatures);
+}
+
+MCSubtargetInfo::MCSubtargetInfo(
     const Triple &TT, StringRef C, StringRef FS,
     ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
     const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
     const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
-    const InstrStage *IS, const unsigned *OC, const unsigned *FP) {
-  TargetTriple = TT;
-  CPU = C;
-  ProcFeatures = PF;
-  ProcDesc = PD;
-  ProcSchedModels = ProcSched;
-  WriteProcResTable = WPR;
-  WriteLatencyTable = WL;
-  ReadAdvanceTable = RA;
-
-  Stages = IS;
-  OperandCycles = OC;
-  ForwardingPaths = FP;
-
+    const InstrStage *IS, const unsigned *OC, const unsigned *FP)
+    : TargetTriple(TT), CPU(C), ProcFeatures(PF), ProcDesc(PD),
+      ProcSchedModels(ProcSched), WriteProcResTable(WPR), WriteLatencyTable(WL),
+      ReadAdvanceTable(RA), Stages(IS), OperandCycles(OC), ForwardingPaths(FP) {
   InitMCProcessorInfo(CPU, FS);
 }
 
@@ -82,8 +74,7 @@ FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
   return FeatureBits;
 }
 
-MCSchedModel
-MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
+const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
   assert(ProcSchedModels && "Processor machine model not available!");
 
   unsigned NumProcs = ProcDesc.size();
@@ -116,6 +107,6 @@ MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
 
 /// Initialize an InstrItineraryData instance.
 void MCSubtargetInfo::initInstrItins(InstrItineraryData &InstrItins) const {
-  InstrItins =
-    InstrItineraryData(CPUSchedModel, Stages, OperandCycles, ForwardingPaths);
+  InstrItins = InstrItineraryData(getSchedModel(), Stages, OperandCycles,
+                                  ForwardingPaths);
 }
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index affc57471fdb..125380a9d140 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -19,9 +19,6 @@ using namespace llvm;
 // Sentinel value for the absolute pseudo section.
 MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
 
-const unsigned MCSymbol::NumCommonAlignmentBits;
-const unsigned MCSymbol::NumFlagsBits;
-
 void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
                              MCContext &Ctx) {
   // We may need more space for a Name to account for alignment.  So allocate
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 54ed954a90d9..d4821196a6cf 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 
 using namespace llvm;
 using namespace object;
@@ -115,6 +116,23 @@ uint64_t Archive::Child::getRawSize() const {
   return getHeader()->getSize();
 }
 
+ErrorOr<StringRef> Archive::Child::getBuffer() const {
+  if (!Parent->IsThin)
+    return StringRef(Data.data() + StartOfFile, getSize());
+  ErrorOr<StringRef> Name = getName();
+  if (std::error_code EC = Name.getError())
+    return EC;
+  SmallString<128> FullName =
+      Parent->getMemoryBufferRef().getBufferIdentifier();
+  sys::path::remove_filename(FullName);
+  sys::path::append(FullName, *Name);
+  ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
+  if (std::error_code EC = Buf.getError())
+    return EC;
+  Parent->ThinBuffers.push_back(std::move(*Buf));
+  return Parent->ThinBuffers.back()->getBuffer();
+}
+
 Archive::Child Archive::Child::getNext() const {
   size_t SpaceToSkip = Data.size();
   // If it's odd, add 1 to make it even.
@@ -162,10 +180,10 @@ ErrorOr<StringRef> Archive::Child::getName() const {
                    + Parent->StringTable->getSize()))
       return object_error::parse_failed;
 
-    // GNU long file names end with a /.
+    // GNU long file names end with a "/\n".
     if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
-      StringRef::size_type End = StringRef(addr).find('/');
-      return StringRef(addr, End);
+      StringRef::size_type End = StringRef(addr).find('\n');
+      return StringRef(addr, End - 1);
     }
     return StringRef(addr);
   } else if (name.startswith("#1/")) {
@@ -186,7 +204,10 @@ ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
   if (std::error_code EC = NameOrErr.getError())
     return EC;
   StringRef Name = NameOrErr.get();
-  return MemoryBufferRef(getBuffer(), Name);
+  ErrorOr<StringRef> Buf = getBuffer();
+  if (std::error_code EC = Buf.getError())
+    return EC;
+  return MemoryBufferRef(*Buf, Name);
 }
 
 ErrorOr<std::unique_ptr<Binary>>
@@ -207,7 +228,8 @@ ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
 }
 
 Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
-    : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) {
+    : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()),
+      StringTable(child_end()), FirstRegular(child_end()) {
   StringRef Buffer = Data.getBuffer();
   // Check for sufficient magic.
   if (Buffer.startswith(ThinMagic)) {
@@ -287,7 +309,7 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
 
     ++i;
     if (i == e) {
-      ec = object_error::parse_failed;
+      ec = std::error_code();
       return;
     }
     Name = i->getRawName();
@@ -352,11 +374,11 @@ Archive::child_iterator Archive::child_end() const {
 }
 
 StringRef Archive::Symbol::getName() const {
-  return Parent->SymbolTable->getBuffer().begin() + StringIndex;
+  return Parent->getSymbolTable().begin() + StringIndex;
 }
 
 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
-  const char *Buf = Parent->SymbolTable->getBuffer().begin();
+  const char *Buf = Parent->getSymbolTable().begin();
   const char *Offsets = Buf;
   if (Parent->kind() == K_MIPS64)
     Offsets += sizeof(uint64_t);
@@ -420,7 +442,7 @@ Archive::Symbol Archive::Symbol::getNext() const {
     // and the second being the offset into the archive of the member that
     // define the symbol. After that the next uint32_t is the byte count of
     // the string table followed by the string table.
-    const char *Buf = Parent->SymbolTable->getBuffer().begin();
+    const char *Buf = Parent->getSymbolTable().begin();
     uint32_t RanlibCount = 0;
     RanlibCount = read32le(Buf) / 8;
     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
@@ -437,8 +459,7 @@ Archive::Symbol Archive::Symbol::getNext() const {
     }
   } else {
     // Go to one past next null.
-    t.StringIndex =
-        Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
+    t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
   }
   ++t.SymbolIndex;
   return t;
@@ -448,7 +469,7 @@ Archive::symbol_iterator Archive::symbol_begin() const {
   if (!hasSymbolTable())
     return symbol_iterator(Symbol(this, 0, 0));
 
-  const char *buf = SymbolTable->getBuffer().begin();
+  const char *buf = getSymbolTable().begin();
   if (kind() == K_GNU) {
     uint32_t symbol_count = 0;
     symbol_count = read32be(buf);
@@ -480,7 +501,7 @@ Archive::symbol_iterator Archive::symbol_begin() const {
     symbol_count = read32le(buf);
     buf += 4 + (symbol_count * 2); // Skip indices.
   }
-  uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
+  uint32_t string_start_offset = buf - getSymbolTable().begin();
   return symbol_iterator(Symbol(this, 0, string_start_offset));
 }
 
@@ -491,7 +512,7 @@ Archive::symbol_iterator Archive::symbol_end() const {
 }
 
 uint32_t Archive::getNumberOfSymbols() const {
-  const char *buf = SymbolTable->getBuffer().begin();
+  const char *buf = getSymbolTable().begin();
   if (kind() == K_GNU)
     return read32be(buf);
   if (kind() == K_MIPS64)
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index 00a56d13bfed..a40901c924ea 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -34,8 +34,6 @@
 
 using namespace llvm;
 
-NewArchiveIterator::NewArchiveIterator() {}
-
 NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I,
                                        StringRef Name)
     : IsNewMember(false), Name(Name), OldI(I) {}
@@ -93,8 +91,12 @@ static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
   }
 }
 
-static void print32BE(raw_ostream &Out, uint32_t Val) {
-  support::endian::Writer<support::big>(Out).write(Val);
+static void print32(raw_ostream &Out, object::Archive::Kind Kind,
+                    uint32_t Val) {
+  if (Kind == object::Archive::K_GNU)
+    support::endian::Writer<support::big>(Out).write(Val);
+  else
+    support::endian::Writer<support::little>(Out).write(Val);
 }
 
 static void printRestOfMemberHeader(raw_fd_ostream &Out,
@@ -109,18 +111,42 @@ static void printRestOfMemberHeader(raw_fd_ostream &Out,
   Out << "`\n";
 }
 
-static void printMemberHeader(raw_fd_ostream &Out, StringRef Name,
-                              const sys::TimeValue &ModTime, unsigned UID,
-                              unsigned GID, unsigned Perms, unsigned Size) {
+static void printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name,
+                                      const sys::TimeValue &ModTime,
+                                      unsigned UID, unsigned GID,
+                                      unsigned Perms, unsigned Size) {
   printWithSpacePadding(Out, Twine(Name) + "/", 16);
   printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
 }
 
-static void printMemberHeader(raw_fd_ostream &Out, unsigned NameOffset,
-                              const sys::TimeValue &ModTime, unsigned UID,
-                              unsigned GID, unsigned Perms, unsigned Size) {
+static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name,
+                                 const sys::TimeValue &ModTime, unsigned UID,
+                                 unsigned GID, unsigned Perms, unsigned Size) {
+  uint64_t PosAfterHeader = Out.tell() + 60 + Name.size();
+  // Pad so that even 64 bit object files are aligned.
+  unsigned Pad = OffsetToAlignment(PosAfterHeader, 8);
+  unsigned NameWithPadding = Name.size() + Pad;
+  printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16);
+  printRestOfMemberHeader(Out, ModTime, UID, GID, Perms,
+                          NameWithPadding + Size);
+  Out << Name;
+  assert(PosAfterHeader == Out.tell());
+  while (Pad--)
+    Out.write(uint8_t(0));
+}
+
+static void
+printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind,
+                  StringRef Name,
+                  std::vector<unsigned>::iterator &StringMapIndexIter,
+                  const sys::TimeValue &ModTime, unsigned UID, unsigned GID,
+                  unsigned Perms, unsigned Size) {
+  if (Kind == object::Archive::K_BSD)
+    return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
+  if (Name.size() < 16)
+    return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
   Out << '/';
-  printWithSpacePadding(Out, NameOffset, 15);
+  printWithSpacePadding(Out, *StringMapIndexIter++, 15);
   printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
 }
 
@@ -152,20 +178,26 @@ static void writeStringTable(raw_fd_ostream &Out,
   Out.seek(Pos);
 }
 
+static sys::TimeValue now(bool Deterministic) {
+  if (!Deterministic)
+    return sys::TimeValue::now();
+  sys::TimeValue TV;
+  TV.fromEpochTime(0);
+  return TV;
+}
+
 // Returns the offset of the first reference to a member offset.
 static ErrorOr<unsigned>
-writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members,
+writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
+                 ArrayRef<NewArchiveIterator> Members,
                  ArrayRef<MemoryBufferRef> Buffers,
-                 std::vector<unsigned> &MemberOffsetRefs) {
-  unsigned StartOffset = 0;
-  unsigned MemberNum = 0;
-  std::string NameBuf;
-  raw_string_ostream NameOS(NameBuf);
-  unsigned NumSyms = 0;
+                 std::vector<unsigned> &MemberOffsetRefs, bool Deterministic) {
+  unsigned HeaderStartOffset = 0;
+  unsigned BodyStartOffset = 0;
+  SmallString<128> NameBuf;
+  raw_svector_ostream NameOS(NameBuf);
   LLVMContext Context;
-  for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
-                                              E = Members.end();
-       I != E; ++I, ++MemberNum) {
+  for (unsigned MemberNum = 0, N = Members.size(); MemberNum < N; ++MemberNum) {
     MemoryBufferRef MemberBuffer = Buffers[MemberNum];
     ErrorOr<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
         object::SymbolicFile::createSymbolicFile(
@@ -174,10 +206,14 @@ writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members,
       continue;  // FIXME: check only for "not an object file" errors.
     object::SymbolicFile &Obj = *ObjOrErr.get();
 
-    if (!StartOffset) {
-      printMemberHeader(Out, "", sys::TimeValue::now(), 0, 0, 0, 0);
-      StartOffset = Out.tell();
-      print32BE(Out, 0);
+    if (!HeaderStartOffset) {
+      HeaderStartOffset = Out.tell();
+      if (Kind == object::Archive::K_GNU)
+        printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0);
+      else
+        printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0);
+      BodyStartOffset = Out.tell();
+      print32(Out, Kind, 0); // number of entries or bytes
     }
 
     for (const object::BasicSymbolRef &S : Obj.symbols()) {
@@ -188,35 +224,53 @@ writeSymbolTable(raw_fd_ostream &Out, ArrayRef<NewArchiveIterator> Members,
         continue;
       if (Symflags & object::SymbolRef::SF_Undefined)
         continue;
+
+      unsigned NameOffset = NameOS.tell();
       if (auto EC = S.printName(NameOS))
         return EC;
       NameOS << '\0';
-      ++NumSyms;
       MemberOffsetRefs.push_back(MemberNum);
-      print32BE(Out, 0);
+      if (Kind == object::Archive::K_BSD)
+        print32(Out, Kind, NameOffset);
+      print32(Out, Kind, 0); // member offset
     }
   }
-  Out << NameOS.str();
 
-  if (StartOffset == 0)
+  if (HeaderStartOffset == 0)
     return 0;
 
-  if (Out.tell() % 2)
-    Out << '\0';
+  StringRef StringTable = NameOS.str();
+  if (Kind == object::Archive::K_BSD)
+    print32(Out, Kind, StringTable.size()); // byte count of the string table
+  Out << StringTable;
+
+  // ld64 requires the next member header to start at an offset that is
+  // 4 bytes aligned.
+  unsigned Pad = OffsetToAlignment(Out.tell(), 4);
+  while (Pad--)
+    Out.write(uint8_t(0));
 
+  // Patch up the size of the symbol table now that we know how big it is.
   unsigned Pos = Out.tell();
-  Out.seek(StartOffset - 12);
-  printWithSpacePadding(Out, Pos - StartOffset, 10);
-  Out.seek(StartOffset);
-  print32BE(Out, NumSyms);
+  const unsigned MemberHeaderSize = 60;
+  Out.seek(HeaderStartOffset + 48); // offset of the size field.
+  printWithSpacePadding(Out, Pos - MemberHeaderSize - HeaderStartOffset, 10);
+
+  // Patch up the number of symbols.
+  Out.seek(BodyStartOffset);
+  unsigned NumSyms = MemberOffsetRefs.size();
+  if (Kind == object::Archive::K_GNU)
+    print32(Out, Kind, NumSyms);
+  else
+    print32(Out, Kind, NumSyms * 8);
+
   Out.seek(Pos);
-  return StartOffset + 4;
+  return BodyStartOffset + 4;
 }
 
-std::pair<StringRef, std::error_code>
-llvm::writeArchive(StringRef ArcName,
-                   std::vector<NewArchiveIterator> &NewMembers,
-                   bool WriteSymtab) {
+std::pair<StringRef, std::error_code> llvm::writeArchive(
+    StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
+    bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic) {
   SmallString<128> TmpArchive;
   int TmpArchiveFD;
   if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a",
@@ -267,56 +321,60 @@ llvm::writeArchive(StringRef ArcName,
 
   unsigned MemberReferenceOffset = 0;
   if (WriteSymtab) {
-    ErrorOr<unsigned> MemberReferenceOffsetOrErr =
-        writeSymbolTable(Out, NewMembers, Members, MemberOffsetRefs);
+    ErrorOr<unsigned> MemberReferenceOffsetOrErr = writeSymbolTable(
+        Out, Kind, NewMembers, Members, MemberOffsetRefs, Deterministic);
     if (auto EC = MemberReferenceOffsetOrErr.getError())
       return std::make_pair(ArcName, EC);
     MemberReferenceOffset = MemberReferenceOffsetOrErr.get();
   }
 
   std::vector<unsigned> StringMapIndexes;
-  writeStringTable(Out, NewMembers, StringMapIndexes);
+  if (Kind != object::Archive::K_BSD)
+    writeStringTable(Out, NewMembers, StringMapIndexes);
 
   unsigned MemberNum = 0;
-  unsigned LongNameMemberNum = 0;
   unsigned NewMemberNum = 0;
+  std::vector<unsigned>::iterator StringMapIndexIter = StringMapIndexes.begin();
   std::vector<unsigned> MemberOffset;
-  for (std::vector<NewArchiveIterator>::iterator I = NewMembers.begin(),
-                                                 E = NewMembers.end();
-       I != E; ++I, ++MemberNum) {
+  for (const NewArchiveIterator &I : NewMembers) {
+    MemoryBufferRef File = Members[MemberNum++];
 
     unsigned Pos = Out.tell();
     MemberOffset.push_back(Pos);
 
-    MemoryBufferRef File = Members[MemberNum];
-    if (I->isNewMember()) {
-      StringRef FileName = I->getNew();
+    sys::TimeValue ModTime;
+    unsigned UID;
+    unsigned GID;
+    unsigned Perms;
+    if (Deterministic) {
+      ModTime.fromEpochTime(0);
+      UID = 0;
+      GID = 0;
+      Perms = 0644;
+    } else if (I.isNewMember()) {
       const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum];
-      NewMemberNum++;
-
-      StringRef Name = sys::path::filename(FileName);
-      if (Name.size() < 16)
-        printMemberHeader(Out, Name, Status.getLastModificationTime(),
-                          Status.getUser(), Status.getGroup(),
-                          Status.permissions(), Status.getSize());
-      else
-        printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
-                          Status.getLastModificationTime(), Status.getUser(),
-                          Status.getGroup(), Status.permissions(),
-                          Status.getSize());
+      ModTime = Status.getLastModificationTime();
+      UID = Status.getUser();
+      GID = Status.getGroup();
+      Perms = Status.permissions();
     } else {
-      object::Archive::child_iterator OldMember = I->getOld();
-      StringRef Name = I->getName();
+      object::Archive::child_iterator OldMember = I.getOld();
+      ModTime = OldMember->getLastModified();
+      UID = OldMember->getUID();
+      GID = OldMember->getGID();
+      Perms = OldMember->getAccessMode();
+    }
 
-      if (Name.size() < 16)
-        printMemberHeader(Out, Name, OldMember->getLastModified(),
-                          OldMember->getUID(), OldMember->getGID(),
-                          OldMember->getAccessMode(), OldMember->getSize());
-      else
-        printMemberHeader(Out, StringMapIndexes[LongNameMemberNum++],
-                          OldMember->getLastModified(), OldMember->getUID(),
-                          OldMember->getGID(), OldMember->getAccessMode(),
-                          OldMember->getSize());
+    if (I.isNewMember()) {
+      StringRef FileName = I.getNew();
+      const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum++];
+      printMemberHeader(Out, Kind, sys::path::filename(FileName),
+                        StringMapIndexIter, ModTime, UID, GID, Perms,
+                        Status.getSize());
+    } else {
+      object::Archive::child_iterator OldMember = I.getOld();
+      printMemberHeader(Out, Kind, I.getName(), StringMapIndexIter, ModTime,
+                        UID, GID, Perms, OldMember->getSize());
     }
 
     Out << File.getBuffer();
@@ -327,8 +385,11 @@ llvm::writeArchive(StringRef ArcName,
 
   if (MemberReferenceOffset) {
     Out.seek(MemberReferenceOffset);
-    for (unsigned MemberNum : MemberOffsetRefs)
-      print32BE(Out, MemberOffset[MemberNum]);
+    for (unsigned MemberNum : MemberOffsetRefs) {
+      if (Kind == object::Archive::K_BSD)
+        Out.seek(Out.tell() + 4); // skip over the string offset
+      print32(Out, Kind, MemberOffset[MemberNum]);
+    }
   }
 
   Output.keep();
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 64bb0d5c636d..bcca9839b475 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -154,30 +154,24 @@ ErrorOr<StringRef> COFFObjectFile::getSymbolName(DataRefImpl Ref) const {
   return Result;
 }
 
-uint64_t COFFObjectFile::getSymbolValue(DataRefImpl Ref) const {
-  COFFSymbolRef Sym = getCOFFSymbol(Ref);
-
-  if (Sym.isAnyUndefined() || Sym.isCommon())
-    return UnknownAddress;
-
-  return Sym.getValue();
+uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const {
+  return getCOFFSymbol(Ref).getValue();
 }
 
-std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
-                                                 uint64_t &Result) const {
-  Result = getSymbolValue(Ref);
+ErrorOr<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const {
+  uint64_t Result = getSymbolValue(Ref);
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
   int32_t SectionNumber = Symb.getSectionNumber();
 
   if (Symb.isAnyUndefined() || Symb.isCommon() ||
       COFF::isReservedSectionNumber(SectionNumber))
-    return std::error_code();
+    return Result;
 
   const coff_section *Section = nullptr;
   if (std::error_code EC = getSection(SectionNumber, Section))
     return EC;
   Result += Section->VirtualAddress;
-  return std::error_code();
+  return Result;
 }
 
 SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const {
@@ -362,6 +356,8 @@ getFirstReloc(const coff_section *Sec, MemoryBufferRef M, const uint8_t *Base) {
 relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Ref) const {
   const coff_section *Sec = toSec(Ref);
   const coff_relocation *begin = getFirstReloc(Sec, Data, base());
+  if (begin && Sec->VirtualAddress != 0)
+    report_fatal_error("Sections with relocations should have an address of 0");
   DataRefImpl Ret;
   Ret.p = reinterpret_cast<uintptr_t>(begin);
   return relocation_iterator(RelocationRef(Ret, this));
@@ -919,19 +915,15 @@ uint64_t COFFObjectFile::getSectionSize(const coff_section *Sec) const {
   // whether or not we have an executable image.
   //
   // For object files, SizeOfRawData contains the size of section's data;
-  // VirtualSize is always zero.
+  // VirtualSize should be zero but isn't due to buggy COFF writers.
   //
   // For executables, SizeOfRawData *must* be a multiple of FileAlignment; the
   // actual section size is in VirtualSize.  It is possible for VirtualSize to
   // be greater than SizeOfRawData; the contents past that point should be
   // considered to be zero.
-  uint32_t SectionSize;
-  if (Sec->VirtualSize)
-    SectionSize = std::min(Sec->VirtualSize, Sec->SizeOfRawData);
-  else
-    SectionSize = Sec->SizeOfRawData;
-
-  return SectionSize;
+  if (getDOSHeader())
+    return std::min(Sec->VirtualSize, Sec->SizeOfRawData);
+  return Sec->SizeOfRawData;
 }
 
 std::error_code
@@ -961,10 +953,6 @@ void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
             reinterpret_cast<const coff_relocation*>(Rel.p) + 1);
 }
 
-ErrorOr<uint64_t> COFFObjectFile::getRelocationAddress(DataRefImpl Rel) const {
-  report_fatal_error("getRelocationAddress not implemented in COFFObjectFile");
-}
-
 uint64_t COFFObjectFile::getRelocationOffset(DataRefImpl Rel) const {
   const coff_relocation *R = toRel(Rel);
   return R->VirtualAddress;
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index ecdd468305be..72c232c32870 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -627,6 +627,11 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
   IO.mapOptional("Size", Section.Size, Hex64(Section.Content.binary_size()));
 }
 
+static void sectionMapping(IO &IO, ELFYAML::NoBitsSection &Section) {
+  commonSectionMapping(IO, Section);
+  IO.mapOptional("Size", Section.Size, Hex64(0));
+}
+
 static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
   commonSectionMapping(IO, Section);
   IO.mapOptional("Relocations", Section.Relocations);
@@ -682,6 +687,11 @@ void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
       Section.reset(new ELFYAML::Group());
     groupSectionMapping(IO, *cast<ELFYAML::Group>(Section.get()));
     break;
+  case ELF::SHT_NOBITS:
+    if (!IO.outputting())
+      Section.reset(new ELFYAML::NoBitsSection());
+    sectionMapping(IO, *cast<ELFYAML::NoBitsSection>(Section.get()));
+    break;
   case ELF::SHT_MIPS_ABIFLAGS:
     if (!IO.outputting())
       Section.reset(new ELFYAML::MipsABIFlags());
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 4255ed717fb9..05900630c75c 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -368,18 +368,12 @@ std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
   return std::error_code();
 }
 
-uint64_t MachOObjectFile::getSymbolValue(DataRefImpl Sym) const {
-  uint64_t NValue = getNValue(Sym);
-  MachO::nlist_base Entry = getSymbolTableEntryBase(this, Sym);
-  if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0)
-    return UnknownAddress;
-  return NValue;
+uint64_t MachOObjectFile::getSymbolValueImpl(DataRefImpl Sym) const {
+  return getNValue(Sym);
 }
 
-std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Sym,
-                                                  uint64_t &Res) const {
-  Res = getSymbolValue(Sym);
-  return std::error_code();
+ErrorOr<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const {
+  return getSymbolValue(Sym);
 }
 
 uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
@@ -392,9 +386,7 @@ uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
 }
 
 uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const {
-  uint64_t Value;
-  getSymbolAddress(DRI, Value);
-  return Value;
+  return getNValue(DRI);
 }
 
 SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
@@ -422,9 +414,6 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
 
   uint32_t Result = SymbolRef::SF_None;
 
-  if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF)
-    Result |= SymbolRef::SF_Undefined;
-
   if ((MachOType & MachO::N_TYPE) == MachO::N_INDR)
     Result |= SymbolRef::SF_Indirect;
 
@@ -434,10 +423,10 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
   if (MachOType & MachO::N_EXT) {
     Result |= SymbolRef::SF_Global;
     if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) {
-      uint64_t Value;
-      getSymbolAddress(DRI, Value);
-      if (Value && Value != UnknownAddress)
+      if (getNValue(DRI))
         Result |= SymbolRef::SF_Common;
+      else
+        Result |= SymbolRef::SF_Undefined;
     }
 
     if (!(MachOType & MachO::N_PEXT))
@@ -593,15 +582,6 @@ void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
   ++Rel.d.b;
 }
 
-ErrorOr<uint64_t> MachOObjectFile::getRelocationAddress(DataRefImpl Rel) const {
-  uint64_t Offset = getRelocationOffset(Rel);
-
-  DataRefImpl Sec;
-  Sec.d.a = Rel.d.a;
-  uint64_t SecAddress = getSectionAddress(Sec);
-  return SecAddress + Offset;
-}
-
 uint64_t MachOObjectFile::getRelocationOffset(DataRefImpl Rel) const {
   assert(getHeader().filetype == MachO::MH_OBJECT &&
          "Only implemented for MH_OBJECT");
@@ -932,6 +912,13 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
   return std::error_code();
 }
 
+section_iterator
+MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const {
+  DataRefImpl Sec;
+  Sec.d.a = Rel->getRawDataRefImpl().d.a;
+  return section_iterator(SectionRef(Sec, this));
+}
+
 basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const {
   return getSymbolByIndex(0);
 }
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 945252b21046..5c4b7a67b2ad 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -180,10 +180,10 @@ const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
 }
 
 uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
-  uint64_t ret;
-  if (std::error_code ec = (*unwrap(SI))->getAddress(ret))
-    report_fatal_error(ec.message());
-  return ret;
+  ErrorOr<uint64_t> Ret = (*unwrap(SI))->getAddress();
+  if (std::error_code EC = Ret.getError())
+    report_fatal_error(EC.message());
+  return *Ret;
 }
 
 uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
@@ -191,13 +191,6 @@ uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
 }
 
 // RelocationRef accessors
-uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
-  ErrorOr<uint64_t> Ret = (*unwrap(RI))->getAddress();
-  if (std::error_code EC = Ret.getError())
-    report_fatal_error(EC.message());
-  return *Ret;
-}
-
 uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
   return (*unwrap(RI))->getOffset();
 }
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 04e4916f94ef..f82edae89bc6 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -35,6 +35,15 @@ bool SectionRef::containsSymbol(SymbolRef S) const {
   return *this == *SymSec;
 }
 
+uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const {
+  uint32_t Flags = getSymbolFlags(Ref);
+  if (Flags & SymbolRef::SF_Undefined)
+    return 0;
+  if (Flags & SymbolRef::SF_Common)
+    return getCommonSymbolSize(Ref);
+  return getSymbolValueImpl(Ref);
+}
+
 std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
                                             DataRefImpl Symb) const {
   ErrorOr<StringRef> Name = getSymbolName(Symb);
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 4b0a0e5d4819..5d31225396d4 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -52,14 +52,17 @@ namespace llvm {
     /* Number of bits in the significand.  This includes the integer
        bit.  */
     unsigned int precision;
+
+    /* Number of bits actually used in the semantics. */
+    unsigned int sizeInBits;
   };
 
-  const fltSemantics APFloat::IEEEhalf = { 15, -14, 11 };
-  const fltSemantics APFloat::IEEEsingle = { 127, -126, 24 };
-  const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53 };
-  const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113 };
-  const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64 };
-  const fltSemantics APFloat::Bogus = { 0, 0, 0 };
+  const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 };
+  const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 };
+  const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 };
+  const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 };
+  const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 };
+  const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 };
 
   /* The PowerPC format consists of two doubles.  It does not map cleanly
      onto the usual format above.  It is approximated using twice the
@@ -72,7 +75,7 @@ namespace llvm {
      to represent all possible values held by a PPC double-double number,
      for example: (long double) 1.0 + (long double) 0x1p-106
      Should this be replaced by a full emulation of PPC double-double?  */
-  const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53 };
+  const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 };
 
   /* A tight upper bound on number of parts required to hold the value
      pow(5, power) is
@@ -2416,7 +2419,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
                                       roundingMode rounding_mode)
 {
   unsigned int parts, pow5PartCount;
-  fltSemantics calcSemantics = { 32767, -32767, 0 };
+  fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
   integerPart pow5Parts[maxPowerOfFiveParts];
   bool isNearest;
 
@@ -3368,6 +3371,10 @@ APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
   }
 }
 
+unsigned APFloat::getSizeInBits(const fltSemantics &Sem) {
+  return Sem.sizeInBits;
+}
+
 /// Make this number the largest magnitude normal number in the given
 /// semantics.
 void APFloat::makeLargest(bool Negative) {
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index dcaacf6248d1..17fba95ebb2b 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -46,21 +46,21 @@ using namespace cl;
 //
 namespace llvm {
 namespace cl {
-TEMPLATE_INSTANTIATION(class basic_parser<bool>);
-TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
-TEMPLATE_INSTANTIATION(class basic_parser<int>);
-TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
-TEMPLATE_INSTANTIATION(class basic_parser<unsigned long long>);
-TEMPLATE_INSTANTIATION(class basic_parser<double>);
-TEMPLATE_INSTANTIATION(class basic_parser<float>);
-TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
-TEMPLATE_INSTANTIATION(class basic_parser<char>);
-
-TEMPLATE_INSTANTIATION(class opt<unsigned>);
-TEMPLATE_INSTANTIATION(class opt<int>);
-TEMPLATE_INSTANTIATION(class opt<std::string>);
-TEMPLATE_INSTANTIATION(class opt<char>);
-TEMPLATE_INSTANTIATION(class opt<bool>);
+template class basic_parser<bool>;
+template class basic_parser<boolOrDefault>;
+template class basic_parser<int>;
+template class basic_parser<unsigned>;
+template class basic_parser<unsigned long long>;
+template class basic_parser<double>;
+template class basic_parser<float>;
+template class basic_parser<std::string>;
+template class basic_parser<char>;
+
+template class opt<unsigned>;
+template class opt<int>;
+template class opt<std::string>;
+template class opt<char>;
+template class opt<bool>;
 }
 } // end namespace llvm::cl
 
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 92be0e047f62..c6646fb101b7 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -1165,6 +1165,122 @@ Triple Triple::get64BitArchVariant() const {
   return T;
 }
 
+Triple Triple::getBigEndianArchVariant() const {
+  Triple T(*this);
+  switch (getArch()) {
+  case Triple::UnknownArch:
+  case Triple::amdgcn:
+  case Triple::amdil64:
+  case Triple::amdil:
+  case Triple::hexagon:
+  case Triple::hsail64:
+  case Triple::hsail:
+  case Triple::kalimba:
+  case Triple::le32:
+  case Triple::le64:
+  case Triple::msp430:
+  case Triple::nvptx64:
+  case Triple::nvptx:
+  case Triple::r600:
+  case Triple::shave:
+  case Triple::spir64:
+  case Triple::spir:
+  case Triple::wasm32:
+  case Triple::wasm64:
+  case Triple::x86:
+  case Triple::x86_64:
+  case Triple::xcore:
+
+  // ARM is intentionally unsupported here, changing the architecture would
+  // drop any arch suffixes.
+  case Triple::arm:
+  case Triple::thumb:
+    T.setArch(UnknownArch);
+    break;
+
+  case Triple::aarch64_be:
+  case Triple::armeb:
+  case Triple::bpfeb:
+  case Triple::mips64:
+  case Triple::mips:
+  case Triple::ppc64:
+  case Triple::ppc:
+  case Triple::sparc:
+  case Triple::sparcv9:
+  case Triple::systemz:
+  case Triple::tce:
+  case Triple::thumbeb:
+    // Already big endian.
+    break;
+
+  case Triple::aarch64: T.setArch(Triple::aarch64_be); break;
+  case Triple::bpfel:   T.setArch(Triple::bpfeb);      break;
+  case Triple::mips64el:T.setArch(Triple::mips64);     break;
+  case Triple::mipsel:  T.setArch(Triple::mips);       break;
+  case Triple::ppc64le: T.setArch(Triple::ppc64);      break;
+  case Triple::sparcel: T.setArch(Triple::sparc);      break;
+  }
+  return T;
+}
+
+Triple Triple::getLittleEndianArchVariant() const {
+  Triple T(*this);
+  switch (getArch()) {
+  case Triple::UnknownArch:
+  case Triple::ppc:
+  case Triple::sparcv9:
+  case Triple::systemz:
+  case Triple::tce:
+
+  // ARM is intentionally unsupported here, changing the architecture would
+  // drop any arch suffixes.
+  case Triple::armeb:
+  case Triple::thumbeb:
+    T.setArch(UnknownArch);
+    break;
+
+  case Triple::aarch64:
+  case Triple::amdgcn:
+  case Triple::amdil64:
+  case Triple::amdil:
+  case Triple::arm:
+  case Triple::bpfel:
+  case Triple::hexagon:
+  case Triple::hsail64:
+  case Triple::hsail:
+  case Triple::kalimba:
+  case Triple::le32:
+  case Triple::le64:
+  case Triple::mips64el:
+  case Triple::mipsel:
+  case Triple::msp430:
+  case Triple::nvptx64:
+  case Triple::nvptx:
+  case Triple::ppc64le:
+  case Triple::r600:
+  case Triple::shave:
+  case Triple::sparcel:
+  case Triple::spir64:
+  case Triple::spir:
+  case Triple::thumb:
+  case Triple::wasm32:
+  case Triple::wasm64:
+  case Triple::x86:
+  case Triple::x86_64:
+  case Triple::xcore:
+    // Already little endian.
+    break;
+
+  case Triple::aarch64_be: T.setArch(Triple::aarch64);  break;
+  case Triple::bpfeb:      T.setArch(Triple::bpfel);    break;
+  case Triple::mips64:     T.setArch(Triple::mips64el); break;
+  case Triple::mips:       T.setArch(Triple::mipsel);   break;
+  case Triple::ppc64:      T.setArch(Triple::ppc64le);  break;
+  case Triple::sparc:      T.setArch(Triple::sparcel);  break;
+  }
+  return T;
+}
+
 const char *Triple::getARMCPUForArch(StringRef MArch) const {
   if (MArch.empty())
     MArch = getArchName();
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 6e982bf1da19..c9a31b64cfd3 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -1648,7 +1648,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
   }
 
   OS << " {";
-  const std::vector<Record*> &SC = R.getSuperClasses();
+  ArrayRef<Record *> SC = R.getSuperClasses();
   if (!SC.empty()) {
     OS << "\t//";
     for (const Record *Super : SC)
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
index 92f5b2dd7172..07c538159dcb 100644
--- a/lib/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -302,7 +302,7 @@ const RecVec *SetTheory::expand(Record *Set) {
     return &I->second;
 
   // This is the first time we see Set. Find a suitable expander.
-  const std::vector<Record*> &SC = Set->getSuperClasses();
+  ArrayRef<Record *> SC = Set->getSuperClasses();
   for (unsigned i = 0, e = SC.size(); i != e; ++i) {
     // Skip unnamed superclasses.
     if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 15df25aea50e..5c36fda2e1ca 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -184,7 +184,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
 
   // Since everything went well, we can now set the "superclass" list for the
   // current record.
-  const std::vector<Record*> &SCs = SC->getSuperClasses();
+  ArrayRef<Record *> SCs = SC->getSuperClasses();
   ArrayRef<SMRange> SCRanges = SC->getSuperClassRanges();
   for (unsigned i = 0, e = SCs.size(); i != e; ++i) {
     if (CurRec->isSubClassOf(SCs[i]))
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index bffd9e6e8c76..79a84ad8c6c5 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -510,9 +510,17 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C,
       if (J.isRegMask())
         AvailableRegs.clearBitsNotInMask(J.getRegMask());
 
-      if (J.isReg() && J.isDef() && AvailableRegs[J.getReg()]) {
-        assert(J.isDead() && "Non-dead def should have been removed by now!");
-        AvailableRegs.reset(J.getReg());
+      if (J.isReg() && J.isDef()) {
+        MCRegAliasIterator AI(J.getReg(), TRI, /*IncludeSelf=*/true);
+        if (J.isDead())
+          for (; AI.isValid(); ++AI)
+            AvailableRegs.reset(*AI);
+#ifndef NDEBUG
+        else
+          for (; AI.isValid(); ++AI)
+            assert(!AvailableRegs[*AI] &&
+                   "Non-dead def should have been removed by now!");
+#endif
       }
     }
   }
@@ -585,7 +593,6 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
       if (Change) {
         Substs[MO.getReg()] = Reg;
         MO.setReg(Reg);
-        MRI->setPhysRegUsed(Reg);
 
         Changed = true;
       }
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 4691e949838d..815ebef177d8 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -40,6 +40,11 @@ def CC_AArch64_AAPCS : CallingConv<[
   // slot is 64-bit.
   CCIfByVal<CCPassByVal<8, 8>>,
 
+  // The 'nest' parameter, if any, is passed in X18.
+  // Darwin uses X18 as the platform register and hence 'nest' isn't currently
+  // supported there.
+  CCIfNest<CCAssignToReg<[X18]>>,
+
   CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
 
   // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index c19fcdc4bb18..072819836bb3 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -310,7 +310,7 @@ CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
 }
 
 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
-  assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
+  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
          "Alloca should always return a pointer.");
 
   // Don't handle dynamic allocas.
@@ -420,7 +420,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 
   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
 
-  EVT DestEVT = TLI.getValueType(GV->getType(), true);
+  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
   if (!DestEVT.isSimple())
     return 0;
 
@@ -459,7 +459,7 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
 }
 
 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
-  EVT CEVT = TLI.getValueType(C->getType(), true);
+  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 
   // Only handle simple types.
   if (!CEVT.isSimple())
@@ -538,13 +538,14 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
   }
   case Instruction::IntToPtr: {
     // Look past no-op inttoptrs.
-    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+        TLI.getPointerTy(DL))
       return computeAddress(U->getOperand(0), Addr, Ty);
     break;
   }
   case Instruction::PtrToInt: {
     // Look past no-op ptrtoints.
-    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
       return computeAddress(U->getOperand(0), Addr, Ty);
     break;
   }
@@ -879,13 +880,13 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
   case Instruction::IntToPtr:
     // Look past no-op inttoptrs if its operand is in the same BB.
     if (InMBB &&
-        TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+        TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+            TLI.getPointerTy(DL))
       return computeCallAddress(U->getOperand(0), Addr);
     break;
   case Instruction::PtrToInt:
     // Look past no-op ptrtoints if its operand is in the same BB.
-    if (InMBB &&
-        TLI.getValueType(U->getType()) == TLI.getPointerTy())
+    if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
       return computeCallAddress(U->getOperand(0), Addr);
     break;
   }
@@ -906,7 +907,7 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
 
 
 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
-  EVT evt = TLI.getValueType(Ty, true);
+  EVT evt = TLI.getValueType(DL, Ty, true);
 
   // Only handle simple types.
   if (evt == MVT::Other || !evt.isSimple())
@@ -1390,7 +1391,7 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
 
 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
   Type *Ty = LHS->getType();
-  EVT EVT = TLI.getValueType(Ty, true);
+  EVT EVT = TLI.getValueType(DL, Ty, true);
   if (!EVT.isSimple())
     return false;
   MVT VT = EVT.getSimpleVT();
@@ -2761,7 +2762,7 @@ bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
   if (SrcReg == 0)
     return false;
 
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
+  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
   if (SrcVT == MVT::f128)
     return false;
 
@@ -2797,7 +2798,7 @@ bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
     return false;
   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
 
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
+  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
 
   // Handle sign-extension.
   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
@@ -2856,7 +2857,7 @@ bool AArch64FastISel::fastLowerArguments() {
     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
       return false;
 
-    EVT ArgVT = TLI.getValueType(ArgTy);
+    EVT ArgVT = TLI.getValueType(DL, ArgTy);
     if (!ArgVT.isSimple())
       return false;
 
@@ -2898,7 +2899,7 @@ bool AArch64FastISel::fastLowerArguments() {
   unsigned GPRIdx = 0;
   unsigned FPRIdx = 0;
   for (auto const &Arg : F->args()) {
-    MVT VT = TLI.getSimpleValueType(Arg.getType());
+    MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
     unsigned SrcReg;
     const TargetRegisterClass *RC;
     if (VT >= MVT::i1 && VT <= MVT::i32) {
@@ -3689,7 +3690,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
   if (Ret->getNumOperands() > 0) {
     CallingConv::ID CC = F.getCallingConv();
     SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
@@ -3724,7 +3725,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
       return false;
 
-    EVT RVEVT = TLI.getValueType(RV->getType());
+    EVT RVEVT = TLI.getValueType(DL, RV->getType());
     if (!RVEVT.isSimple())
       return false;
 
@@ -3772,8 +3773,8 @@ bool AArch64FastISel::selectTrunc(const Instruction *I) {
   Value *Op = I->getOperand(0);
   Type *SrcTy = Op->getType();
 
-  EVT SrcEVT = TLI.getValueType(SrcTy, true);
-  EVT DestEVT = TLI.getValueType(DestTy, true);
+  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
+  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
   if (!SrcEVT.isSimple())
     return false;
   if (!DestEVT.isSimple())
@@ -4459,7 +4460,7 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) {
 }
 
 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
-  EVT DestEVT = TLI.getValueType(I->getType(), true);
+  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
   if (!DestEVT.isSimple())
     return false;
 
@@ -4825,7 +4826,7 @@ std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
   bool IdxNIsKill = hasTrivialKill(Idx);
 
   // If the index is smaller or larger than intptr_t, truncate or extend it.
-  MVT PtrVT = TLI.getPointerTy();
+  MVT PtrVT = TLI.getPointerTy(DL);
   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
   if (IdxVT.bitsLT(PtrVT)) {
     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*IsZExt=*/false);
@@ -4849,7 +4850,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
   // into a single N = N + TotalOffset.
   uint64_t TotalOffs = 0;
   Type *Ty = I->getOperand(0)->getType();
-  MVT VT = TLI.getPointerTy();
+  MVT VT = TLI.getPointerTy(DL);
   for (auto OI = std::next(I->op_begin()), E = I->op_end(); OI != E; ++OI) {
     const Value *Idx = *OI;
     if (auto *StTy = dyn_cast<StructType>(Ty)) {
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 3ba7e70a102d..a7817f4f67dd 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -349,12 +349,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // Allocate space for the rest of the frame.
 
   const unsigned Alignment = MFI->getMaxAlignment();
-  const bool NeedsRealignment = (Alignment > 16);
+  const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
   unsigned scratchSPReg = AArch64::SP;
-  if (NeedsRealignment) {
-    // Use the first callee-saved register as a scratch register
-    assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
-           "No scratch register to align SP!");
+  if (NumBytes && NeedsRealignment) {
+    // Use the first callee-saved register as a scratch register.
     scratchSPReg = AArch64::X9;
   }
 
@@ -366,9 +364,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
                     MachineInstr::FrameSetup);
 
-  assert(!(NeedsRealignment && NumBytes==0) &&
-         "NumBytes should never be 0 when realignment is needed");
-
   if (NumBytes && NeedsRealignment) {
     const unsigned NrBitsToZero = countTrailingZeros(Alignment);
     assert(NrBitsToZero > 1);
@@ -881,28 +876,34 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
   return true;
 }
 
-void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
-    MachineFunction &MF, RegScavenger *RS) const {
+void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                                BitVector &SavedRegs,
+                                                RegScavenger *RS) const {
+  // All calls are tail calls in GHC calling conv, and functions have no
+  // prologue/epilogue.
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
+    return;
+
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  MachineRegisterInfo *MRI = &MF.getRegInfo();
   SmallVector<unsigned, 4> UnspilledCSGPRs;
   SmallVector<unsigned, 4> UnspilledCSFPRs;
 
   // The frame record needs to be created by saving the appropriate registers
   if (hasFP(MF)) {
-    MRI->setPhysRegUsed(AArch64::FP);
-    MRI->setPhysRegUsed(AArch64::LR);
+    SavedRegs.set(AArch64::FP);
+    SavedRegs.set(AArch64::LR);
   }
 
   // Spill the BasePtr if it's used. Do this first thing so that the
   // getCalleeSavedRegs() below will get the right answer.
   if (RegInfo->hasBasePointer(MF))
-    MRI->setPhysRegUsed(RegInfo->getBaseRegister());
+    SavedRegs.set(RegInfo->getBaseRegister());
 
   if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
-    MRI->setPhysRegUsed(AArch64::X9);
+    SavedRegs.set(AArch64::X9);
 
   // If any callee-saved registers are used, the frame cannot be eliminated.
   unsigned NumGPRSpilled = 0;
@@ -924,8 +925,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
                 AArch64::FPR64RegClass.contains(EvenReg)) &&
            "Register class mismatch!");
 
-    const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
-    const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
+    const bool OddRegUsed = SavedRegs.test(OddReg);
+    const bool EvenRegUsed = SavedRegs.test(EvenReg);
 
     // Early exit if none of the registers in the register pair is actually
     // used.
@@ -946,7 +947,7 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
     if (OddRegUsed ^ EvenRegUsed) {
       // Find out which register is the additional spill.
       Reg = OddRegUsed ? EvenReg : OddReg;
-      MRI->setPhysRegUsed(Reg);
+      SavedRegs.set(Reg);
     }
 
     DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
@@ -1001,7 +1002,7 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
       UnspilledCSGPRs.pop_back();
       DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
                    << " to get a scratch register.\n");
-      MRI->setPhysRegUsed(Reg);
+      SavedRegs.set(Reg);
       ExtraCSSpill = true;
       ++Count;
     }
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index b496fccba349..731f031ff855 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -59,8 +59,8 @@ public:
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS) const override;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 1ea4abcf05fa..772e894f4f0a 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -610,10 +610,11 @@ static bool isWorthFoldingADDlow(SDValue N) {
 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
                                               SDValue &Base, SDValue &OffImm) {
   SDLoc dl(N);
+  const DataLayout &DL = CurDAG->getDataLayout();
   const TargetLowering *TLI = getTargetLowering();
   if (N.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
     return true;
   }
@@ -628,10 +629,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
 
     const GlobalValue *GV = GAN->getGlobal();
     unsigned Alignment = GV->getAlignment();
-    const DataLayout *DL = TLI->getDataLayout();
     Type *Ty = GV->getType()->getElementType();
     if (Alignment == 0 && Ty->isSized())
-      Alignment = DL->getABITypeAlignment(Ty);
+      Alignment = DL.getABITypeAlignment(Ty);
 
     if (Alignment >= Size)
       return true;
@@ -645,7 +645,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
         Base = N.getOperand(0);
         if (Base.getOpcode() == ISD::FrameIndex) {
           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
         }
         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
         return true;
@@ -688,7 +688,8 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
         const TargetLowering *TLI = getTargetLowering();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       }
       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
       return true;
@@ -1494,7 +1495,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
 }
 
 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
-                                       unsigned &LSB, unsigned &MSB,
+                                       unsigned &Immr, unsigned &Imms,
                                        bool BiggerPattern) {
   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
          "N must be a SHR/SRA operation to call this function");
@@ -1508,7 +1509,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
          "Type checking must have been done before calling this function");
 
   // Check for AND + SRL doing several bits extract.
-  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
+  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
     return true;
 
   // we're looking for a shift of a shift
@@ -1548,13 +1549,9 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
 
   assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
          "bad amount in shift node!");
-  // Note: The width operand is encoded as width-1.
-  unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
-  int sLSB = Srl_imm - Shl_imm;
-  if (sLSB < 0)
-    return false;
-  LSB = sLSB;
-  MSB = LSB + Width;
+  int immr = Srl_imm - Shl_imm;
+  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
+  Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;
   // SRA requires a signed extraction
   if (VT == MVT::i32)
     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
@@ -1564,7 +1561,7 @@ static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
 }
 
 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
-                                SDValue &Opd0, unsigned &LSB, unsigned &MSB,
+                                SDValue &Opd0, unsigned &Immr, unsigned &Imms,
                                 unsigned NumberOfIgnoredLowBits = 0,
                                 bool BiggerPattern = false) {
   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
@@ -1576,11 +1573,11 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
       return false;
     break;
   case ISD::AND:
-    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
+    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
                                       NumberOfIgnoredLowBits, BiggerPattern);
   case ISD::SRL:
   case ISD::SRA:
-    return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
+    return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
   }
 
   unsigned NOpc = N->getMachineOpcode();
@@ -1593,8 +1590,8 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
   case AArch64::UBFMXri:
     Opc = NOpc;
     Opd0 = N->getOperand(0);
-    LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
-    MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+    Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+    Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
     return true;
   }
   // Unreachable
@@ -1602,9 +1599,9 @@ static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
 }
 
 SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
-  unsigned Opc, LSB, MSB;
+  unsigned Opc, Immr, Imms;
   SDValue Opd0;
-  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
+  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
     return nullptr;
 
   EVT VT = N->getValueType(0);
@@ -1613,8 +1610,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
   // If the bit extract operation is 64bit but the original type is 32bit, we
   // need to add one EXTRACT_SUBREG.
   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
-    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, MVT::i64),
-                       CurDAG->getTargetConstant(MSB, dl, MVT::i64)};
+    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
+                       CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
 
     SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
@@ -1624,8 +1621,8 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
     return Node;
   }
 
-  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, dl, VT),
-                   CurDAG->getTargetConstant(MSB, dl, VT)};
+  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
+                   CurDAG->getTargetConstant(Imms, dl, VT)};
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
 }
 
@@ -2351,7 +2348,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
     const TargetLowering *TLI = getTargetLowering();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    SDValue TFI = CurDAG->getTargetFrameIndex(
+        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     SDLoc DL(Node);
     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
                       CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index f3242cdd971d..3e8f46cf1ecd 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -705,7 +705,8 @@ void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
   addTypeForNEON(VT, MVT::v4i32);
 }
 
-EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+                                              EVT VT) const {
   if (!VT.isVector())
     return MVT::i32;
   return VT.changeVectorElementTypeToInteger();
@@ -774,7 +775,8 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
   }
 }
 
-MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
+MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
+                                                  EVT) const {
   return MVT::i64;
 }
 
@@ -1710,7 +1712,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
 
   const char *LibcallName =
       (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
-  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+  SDValue Callee =
+      DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
 
   StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
   TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2089,7 +2092,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
       CurArgIdx = Ins[i].getOrigArgIndex();
 
       // Get type of the original argument.
-      EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+      EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
+                                  /*AllowUnknown*/ true);
       MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
       // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
       if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
@@ -2111,7 +2115,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
     if (Ins[i].Flags.isByVal()) {
       // Byval is used for HFAs in the PCS, but the system should work in a
       // non-compliant manner for larger structs.
-      EVT PtrTy = getPointerTy();
+      EVT PtrVT = getPointerTy(DAG.getDataLayout());
       int Size = Ins[i].Flags.getByValSize();
       unsigned NumRegs = (Size + 7) / 8;
 
@@ -2119,7 +2123,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
       // case. It should also work for fundamental types too.
       unsigned FrameIdx =
         MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
-      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
       InVals.push_back(FrameIdxN);
 
       continue;
@@ -2186,7 +2190,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
       int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
 
       // Create load nodes to retrieve arguments from the stack.
-      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
       SDValue ArgValue;
 
       // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
@@ -2265,6 +2269,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   SmallVector<SDValue, 8> MemOps;
 
@@ -2279,7 +2284,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
   if (GPRSaveSize != 0) {
     GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
 
-    SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+    SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
 
     for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
       unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
@@ -2288,8 +2293,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
           DAG.getStore(Val.getValue(1), DL, Val, FIN,
                        MachinePointerInfo::getStack(i * 8), false, false, 0);
       MemOps.push_back(Store);
-      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                        DAG.getConstant(8, DL, getPointerTy()));
+      FIN =
+          DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
     }
   }
   FuncInfo->setVarArgsGPRIndex(GPRIdx);
@@ -2307,7 +2312,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
     if (FPRSaveSize != 0) {
       FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
 
-      SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+      SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
 
       for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
         unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
@@ -2317,8 +2322,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
             DAG.getStore(Val.getValue(1), DL, Val, FIN,
                          MachinePointerInfo::getStack(i * 16), false, false, 0);
         MemOps.push_back(Store);
-        FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                          DAG.getConstant(16, DL, getPointerTy()));
+        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+                          DAG.getConstant(16, DL, PtrVT));
       }
     }
     FuncInfo->setVarArgsFPRIndex(FPRIdx);
@@ -2614,7 +2619,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     for (unsigned i = 0; i != NumArgs; ++i) {
       MVT ValVT = Outs[i].VT;
       // Get type of the original argument.
-      EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
+      EVT ActualVT = getValueType(DAG.getDataLayout(),
+                                  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
                                   /*AllowUnknown*/ true);
       MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
@@ -2674,10 +2680,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
                                                               true),
                                  DL);
 
-  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
+                                        getPointerTy(DAG.getDataLayout()));
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
@@ -2743,13 +2751,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       unsigned LocMemOffset = VA.getLocMemOffset();
       int32_t Offset = LocMemOffset + BEAlign;
       SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
-      PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+      PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
 
       if (IsTailCall) {
         Offset = Offset + FPDiff;
         int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
 
-        DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+        DstAddr = DAG.getFrameIndex(FI, PtrVT);
         DstInfo = MachinePointerInfo::getFixedStack(FI);
 
         // Make sure any stack arguments overlapping with where we're storing
@@ -2759,7 +2767,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       } else {
         SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
 
-        DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+        DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
         DstInfo = MachinePointerInfo::getStack(LocMemOffset);
       }
 
@@ -2809,25 +2817,24 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       const GlobalValue *GV = G->getGlobal();
       bool InternalLinkage = GV->hasInternalLinkage();
       if (InternalLinkage)
-        Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+        Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
       else {
-        Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
-                                            AArch64II::MO_GOT);
-        Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+        Callee =
+            DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+        Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
       }
     } else if (ExternalSymbolSDNode *S =
                    dyn_cast<ExternalSymbolSDNode>(Callee)) {
       const char *Sym = S->getSymbol();
-      Callee =
-          DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT);
-      Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+      Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
+      Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
     }
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = G->getGlobal();
-    Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+    Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     const char *Sym = S->getSymbol();
-    Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
+    Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
   }
 
   // We don't usually want to end the call-sequence here because we would tidy
@@ -2977,7 +2984,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 
 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
                                                   SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(Op);
   const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GN->getGlobal();
@@ -3069,7 +3076,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
 
   SDLoc DL(Op);
-  MVT PtrVT = getPointerTy();
+  MVT PtrVT = getPointerTy(DAG.getDataLayout());
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   SDValue TLVPAddr =
@@ -3124,7 +3131,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
 ///  the sequence is produced as per above.
 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
                                                       SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   SDValue Chain = DAG.getEntryNode();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -3159,7 +3166,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
   }
 
   SDValue TPOff;
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(Op);
   const GlobalValue *GV = GA->getGlobal();
 
@@ -3786,7 +3793,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
   // Jump table entries as PC relative offsets. No additional tweaking
   // is necessary here. Just get the address of the jump table.
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(Op);
 
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
@@ -3812,7 +3819,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
                                                  SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(Op);
 
   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
@@ -3853,7 +3860,7 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(Op);
   if (getTargetMachine().getCodeModel() == CodeModel::Large &&
       !Subtarget->isTargetMachO()) {
@@ -3879,8 +3886,8 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
       DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
 
   SDLoc DL(Op);
-  SDValue FR =
-      DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
+                                 getPointerTy(DAG.getDataLayout()));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
                       MachinePointerInfo(SV), false, false, 0);
@@ -3892,6 +3899,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
   // Standard, section B.3.
   MachineFunction &MF = DAG.getMachineFunction();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc DL(Op);
 
   SDValue Chain = Op.getOperand(0);
@@ -3900,8 +3908,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
   SmallVector<SDValue, 4> MemOps;
 
   // void *__stack at offset 0
-  SDValue Stack =
-      DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
   MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
                                 MachinePointerInfo(SV), false, false, 8));
 
@@ -3910,12 +3917,12 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
   if (GPRSize > 0) {
     SDValue GRTop, GRTopAddr;
 
-    GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(8, DL, getPointerTy()));
+    GRTopAddr =
+        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
 
-    GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
-    GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
-                        DAG.getConstant(GPRSize, DL, getPointerTy()));
+    GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
+    GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
+                        DAG.getConstant(GPRSize, DL, PtrVT));
 
     MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
                                   MachinePointerInfo(SV, 8), false, false, 8));
@@ -3925,28 +3932,28 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
   int FPRSize = FuncInfo->getVarArgsFPRSize();
   if (FPRSize > 0) {
     SDValue VRTop, VRTopAddr;
-    VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(16, DL, getPointerTy()));
+    VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                            DAG.getConstant(16, DL, PtrVT));
 
-    VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
-    VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
-                        DAG.getConstant(FPRSize, DL, getPointerTy()));
+    VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
+    VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
+                        DAG.getConstant(FPRSize, DL, PtrVT));
 
     MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
                                   MachinePointerInfo(SV, 16), false, false, 8));
   }
 
   // int __gr_offs at offset 24
-  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(24, DL, getPointerTy()));
+  SDValue GROffsAddr =
+      DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
   MemOps.push_back(DAG.getStore(Chain, DL,
                                 DAG.getConstant(-GPRSize, DL, MVT::i32),
                                 GROffsAddr, MachinePointerInfo(SV, 24), false,
                                 false, 4));
 
   // int __vr_offs at offset 28
-  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(28, DL, getPointerTy()));
+  SDValue VROffsAddr =
+      DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
   MemOps.push_back(DAG.getStore(Chain, DL,
                                 DAG.getConstant(-FPRSize, DL, MVT::i32),
                                 VROffsAddr, MachinePointerInfo(SV, 28), false,
@@ -3987,21 +3994,22 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain = Op.getOperand(0);
   SDValue Addr = Op.getOperand(1);
   unsigned Align = Op.getConstantOperandVal(3);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
-                               MachinePointerInfo(V), false, false, false, 0);
+  SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V),
+                               false, false, false, 0);
   Chain = VAList.getValue(1);
 
   if (Align > 8) {
     assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
-    VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                         DAG.getConstant(Align - 1, DL, getPointerTy()));
-    VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
-                         DAG.getConstant(-(int64_t)Align, DL, getPointerTy()));
+    VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                         DAG.getConstant(Align - 1, DL, PtrVT));
+    VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+                         DAG.getConstant(-(int64_t)Align, DL, PtrVT));
   }
 
   Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
-  uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+  uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
 
   // Scalar integer and FP values smaller than 64 bits are implicitly extended
   // up to 64 bits.  At the very least, we have to increase the striding of the
@@ -4016,8 +4024,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Increment the pointer, VAList, to the next vaarg
-  SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                               DAG.getConstant(ArgSize, DL, getPointerTy()));
+  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                               DAG.getConstant(ArgSize, DL, PtrVT));
   // Store the incremented VAList to the legalized pointer
   SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
                                  false, false, 0);
@@ -4057,8 +4065,8 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
-                                                  EVT VT) const {
+unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+                                                  SelectionDAG &DAG) const {
   unsigned Reg = StringSwitch<unsigned>(RegName)
                        .Case("sp", AArch64::SP)
                        .Default(0);
@@ -4079,7 +4087,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   if (Depth) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    SDValue Offset = DAG.getConstant(8, DL, getPointerTy());
+    SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
                        MachinePointerInfo(), false, false, false, 0);
@@ -4232,7 +4240,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 AArch64TargetLowering::ConstraintType
-AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default:
@@ -4283,8 +4291,7 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
 
 std::pair<unsigned, const TargetRegisterClass *>
 AArch64TargetLowering::getRegForInlineAsmConstraint(
-    const TargetRegisterInfo *TRI, const std::string &Constraint,
-    MVT VT) const {
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
@@ -4320,10 +4327,9 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
     unsigned Size = Constraint.size();
     if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
         tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
-      const std::string Reg =
-          std::string(&Constraint[2], &Constraint[Size - 1]);
-      int RegNo = atoi(Reg.c_str());
-      if (RegNo >= 0 && RegNo <= 31) {
+      int RegNo;
+      bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
+      if (!Failed && RegNo >= 0 && RegNo <= 31) {
         // v0 - v31 are aliases of q0 - q31.
         // By default we'll emit v0-v31 for this unless there's a modifier where
         // we'll emit the correct register as well.
@@ -6429,6 +6435,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
 bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                                const CallInst &I,
                                                unsigned Intrinsic) const {
+  auto &DL = I.getModule()->getDataLayout();
   switch (Intrinsic) {
   case Intrinsic::aarch64_neon_ld2:
   case Intrinsic::aarch64_neon_ld3:
@@ -6444,7 +6451,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::aarch64_neon_ld4r: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     // Conservatively set memVT to the entire set of vectors loaded.
-    uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+    uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
     Info.offset = 0;
@@ -6470,7 +6477,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       Type *ArgTy = I.getArgOperand(ArgI)->getType();
       if (!ArgTy->isVectorTy())
         break;
-      NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+      NumElts += DL.getTypeAllocSize(ArgTy) / 8;
     }
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
@@ -6488,7 +6495,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::getVT(PtrTy->getElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
     Info.vol = true;
     Info.readMem = true;
     Info.writeMem = false;
@@ -6501,7 +6508,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::getVT(PtrTy->getElementType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
     Info.vol = true;
     Info.readMem = false;
     Info.writeMem = true;
@@ -6572,7 +6579,8 @@ bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
     return true;
 
   const TargetOptions &Options = getTargetMachine().Options;
-  EVT VT = getValueType(User->getOperand(0)->getType());
+  const DataLayout &DL = I->getModule()->getDataLayout();
+  EVT VT = getValueType(DL, User->getOperand(0)->getType());
 
   if (isFMAFasterThanFMulAndFAdd(VT) &&
       isOperationLegalOrCustom(ISD::FMA, VT) &&
@@ -6637,6 +6645,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
       break;
     case Instruction::GetElementPtr: {
       gep_type_iterator GTI = gep_type_begin(Instr);
+      auto &DL = Ext->getModule()->getDataLayout();
       std::advance(GTI, U.getOperandNo());
       Type *IdxTy = *GTI;
       // This extension will end up with a shift because of the scaling factor.
@@ -6644,7 +6653,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
       // Get the shift amount based on the scaling factor:
       // log2(sizeof(IdxTy)) - log2(8).
       uint64_t ShiftAmt =
-        countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+          countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
       // Is the constant foldable in the shift of the addressing mode?
       // I.e., shift amount is between 1 and 4 inclusive.
       if (ShiftAmt == 0 || ShiftAmt > 4)
@@ -6708,10 +6717,10 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
   assert(Shuffles.size() == Indices.size() &&
          "Unmatched number of shufflevectors and indices");
 
-  const DataLayout *DL = getDataLayout();
+  const DataLayout &DL = LI->getModule()->getDataLayout();
 
   VectorType *VecTy = Shuffles[0]->getType();
-  unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+  unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
 
   // Skip illegal vector types.
   if (VecSize != 64 && VecSize != 128)
@@ -6721,8 +6730,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
   // load integer vectors first and then convert to pointer vectors.
   Type *EltTy = VecTy->getVectorElementType();
   if (EltTy->isPointerTy())
-    VecTy = VectorType::get(DL->getIntPtrType(EltTy),
-                            VecTy->getVectorNumElements());
+    VecTy =
+        VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
 
   Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
   Type *Tys[2] = {VecTy, PtrTy};
@@ -6796,8 +6805,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
   Type *EltTy = VecTy->getVectorElementType();
   VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
 
-  const DataLayout *DL = getDataLayout();
-  unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
 
   // Skip illegal vector types.
   if (SubVecSize != 64 && SubVecSize != 128)
@@ -6810,7 +6819,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
   // StN intrinsics don't support pointer vectors as arguments. Convert pointer
   // vectors to integer vectors.
   if (EltTy->isPointerTy()) {
-    Type *IntTy = DL->getIntPtrType(EltTy);
+    Type *IntTy = DL.getIntPtrType(EltTy);
     unsigned NumOpElts =
         dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
 
@@ -6894,8 +6903,8 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
 
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
-bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                                  Type *Ty,
+bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                                  const AddrMode &AM, Type *Ty,
                                                   unsigned AS) const {
   // AArch64 has five basic addressing modes:
   //  reg
@@ -6916,7 +6925,7 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
   uint64_t NumBytes = 0;
   if (Ty->isSized()) {
-    uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
+    uint64_t NumBits = DL.getTypeSizeInBits(Ty);
     NumBytes = NumBits / 8;
     if (!isPowerOf2_64(NumBits))
       NumBytes = 0;
@@ -6946,8 +6955,8 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return false;
 }
 
-int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
-                                                Type *Ty,
+int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
+                                                const AddrMode &AM, Type *Ty,
                                                 unsigned AS) const {
   // Scaling factors are not free at all.
   // Operands                     | Rt Latency
@@ -6956,7 +6965,7 @@ int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
   // -------------------------------------------
   // Rt, [Xn, Xm, lsl #imm]       | Rn: 4 Rm: 5
   // Rt, [Xn, Wm, <extend> #imm]  |
-  if (isLegalAddressingMode(AM, Ty, AS))
+  if (isLegalAddressingMode(DL, AM, Ty, AS))
     // Scale represents reg2 * scale, thus account for 1 if
     // it is not equal to 0 or 1.
     return AM.Scale != 0 && AM.Scale != 1;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 46298c0e7de1..c73ce1e54b3e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -233,7 +233,7 @@ public:
                                      APInt &KnownOne, const SelectionDAG &DAG,
                                      unsigned Depth = 0) const override;
 
-  MVT getScalarShiftAmountTy(EVT LHSTy) const override;
+  MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
 
   /// allowsMisalignedMemoryAccesses - Returns true if the target allows
   /// unaligned memory accesses of the specified type.
@@ -278,7 +278,8 @@ public:
   bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
 
   /// getSetCCResultType - Return the ISD::SETCC ValueType
-  EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                         EVT VT) const override;
 
   SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
 
@@ -323,7 +324,7 @@ public:
 
   /// isLegalAddressingMode - Return true if the addressing mode represented
   /// by AM is legal for this target, for a load/store of the specified type.
-  bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
 
   /// \brief Return the cost of the scaling factor used in the addressing
@@ -331,7 +332,7 @@ public:
   /// of the specified type.
   /// If the AM is supported, the return value must be >= 0.
   /// If the AM is not supported, it returns a negative value.
-  int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+  int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                            unsigned AS) const override;
 
   /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
@@ -471,9 +472,9 @@ private:
                         std::vector<SDNode *> *Created) const override;
   bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
 
-  ConstraintType
-  getConstraintType(const std::string &Constraint) const override;
-  unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+  ConstraintType getConstraintType(StringRef Constraint) const override;
+  unsigned getRegisterByName(const char* RegName, EVT VT,
+                             SelectionDAG &DAG) const override;
 
   /// Examine constraint string and operand type and determine a weight value.
   /// The operand object must already have been set up with the operand type.
@@ -483,14 +484,12 @@ private:
 
   std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                               const std::string &Constraint,
-                               MVT VT) const override;
+                               StringRef Constraint, MVT VT) const override;
   void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
                                     std::vector<SDValue> &Ops,
                                     SelectionDAG &DAG) const override;
 
-  unsigned getInlineAsmMemConstraint(
-      const std::string &ConstraintCode) const override {
+  unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
     if (ConstraintCode == "Q")
       return InlineAsm::Constraint_Q;
     // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index b73e0958df90..fa1a46acba84 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -894,6 +894,8 @@ def REVXr   : OneXRegData<0b011, "rev", bswap>;
 def REV32Xr : OneXRegData<0b010, "rev32",
                                  UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
 
+def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
+
 // The bswap commutes with the rotr so we want a pattern for both possible
 // orders.
 def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
@@ -5283,18 +5285,23 @@ def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
 
 def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
 def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
 
 def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
 def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
 
 def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
@@ -5309,12 +5316,16 @@ def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
 
 def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
 def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
 def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
 def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
 def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
+def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
 
 let Predicates = [IsLE] in {
 def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 1836682e386e..841af55f7a65 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -90,7 +90,7 @@ AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
 
 BitVector
 AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const AArch64FrameLowering *TFI = getFrameLowering(MF);
 
   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
@@ -119,7 +119,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
 bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
                                       unsigned Reg) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const AArch64FrameLowering *TFI = getFrameLowering(MF);
 
   switch (Reg) {
   default:
@@ -198,11 +198,9 @@ bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
 bool
 AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const AArch64FrameLowering *TFI = getFrameLowering(MF);
   const Function *F = MF.getFunction();
-  unsigned StackAlign = MF.getTarget()
-                            .getSubtargetImpl(*MF.getFunction())
-                            ->getFrameLowering()
-                            ->getStackAlignment();
+  unsigned StackAlign = TFI->getStackAlignment();
   bool requiresRealignment =
       ((MFI->getMaxAlignment() > StackAlign) ||
        F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -213,8 +211,7 @@ AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
 
 unsigned
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-
+  const AArch64FrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
 }
 
@@ -280,7 +277,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
   // Note that the incoming offset is based on the SP value at function entry,
   // so it'll be negative.
   MachineFunction &MF = *MI->getParent()->getParent();
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const AArch64FrameLowering *TFI = getFrameLowering(MF);
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // Estimate an offset from the frame pointer.
@@ -376,8 +373,7 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineFunction &MF = *MBB.getParent();
   const AArch64InstrInfo *TII =
       MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
-  const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
-      MF.getSubtarget().getFrameLowering());
+  const AArch64FrameLowering *TFI = getFrameLowering(MF);
 
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   unsigned FrameReg;
@@ -415,7 +411,7 @@ namespace llvm {
 
 unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                                   MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const AArch64FrameLowering *TFI = getFrameLowering(MF);
 
   switch (RC->getID()) {
   default:
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index b9c53998752a..f40293021d74 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -16,11 +16,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-selectiondag-info"
 
-AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL)
-    : TargetSelectionDAGInfo(DL) {}
-
-AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {}
-
 SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
     SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, unsigned Align, bool isVolatile,
@@ -37,8 +32,8 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
   if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
     const AArch64TargetLowering &TLI = *STI.getTargetLowering();
 
-    EVT IntPtr = TLI.getPointerTy();
-    Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+    EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
+    Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
     TargetLowering::ArgListTy Args;
     TargetLowering::ArgListEntry Entry;
     Entry.Node = Dst;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 11932d2b1c22..97421b45b122 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -20,8 +20,6 @@ namespace llvm {
 
 class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  explicit AArch64SelectionDAGInfo(const DataLayout *DL);
-  ~AArch64SelectionDAGInfo();
 
   SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
                                   SDValue Dst, SDValue Src, SDValue Size,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 554826b1e08a..486efd6ce3a2 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -49,15 +49,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
       HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false),
       HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
       IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
-      InstrInfo(initializeSubtargetDependencies(FS)),
-      TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
+      InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+      TLInfo(TM, *this) {}
 
 /// ClassifyGlobalReference - Find the target operand flags that describe
 /// how a global value should be referenced for the current subtarget.
 unsigned char
 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
                                         const TargetMachine &TM) const {
-  bool isDecl = GV->isDeclarationForLinker();
+  bool isDef = GV->isStrongDefinitionForLinker();
 
   // MachO large model always goes via a GOT, simply to get a single 8-byte
   // absolute relocation on all global addresses.
@@ -66,8 +66,7 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
 
   // The small code mode's direct accesses use ADRP, which cannot necessarily
   // produce the value 0 (if the code is above 4GB).
-  if (TM.getCodeModel() == CodeModel::Small &&
-      GV->isWeakForLinker() && isDecl) {
+  if (TM.getCodeModel() == CodeModel::Small && GV->hasExternalWeakLinkage()) {
     // In PIC mode use the GOT, but in absolute mode use a constant pool load.
     if (TM.getRelocationModel() == Reloc::Static)
         return AArch64II::MO_CONSTPOOL;
@@ -85,8 +84,7 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
   //     defined could end up in unexpected places. Use a GOT.
   if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) {
     if (isTargetMachO())
-      return (isDecl || GV->isWeakForLinker()) ? AArch64II::MO_GOT
-                                               : AArch64II::MO_NO_FLAG;
+      return isDef ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT;
     else
       // No need to go through the GOT for local symbols on ELF.
       return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index fc91c94351cc..e085cca35f1c 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -181,8 +181,8 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
-  EVT SrcTy = TLI->getValueType(Src);
-  EVT DstTy = TLI->getValueType(Dst);
+  EVT SrcTy = TLI->getValueType(DL, Src);
+  EVT DstTy = TLI->getValueType(DL, Dst);
 
   if (!SrcTy.isSimple() || !DstTy.isSimple())
     return BaseT::getCastInstrCost(Opcode, Dst, Src);
@@ -265,7 +265,7 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
 
   if (Index != -1U) {
     // Legalize the type.
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
 
     // This type is legalized to a scalar type.
     if (!LT.second.isVector())
@@ -289,7 +289,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
@@ -364,8 +364,8 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
       { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
     };
 
-    EVT SelCondTy = TLI->getValueType(CondTy);
-    EVT SelValTy = TLI->getValueType(ValTy);
+    EVT SelCondTy = TLI->getValueType(DL, CondTy);
+    EVT SelValTy = TLI->getValueType(DL, ValTy);
     if (SelCondTy.isSimple() && SelValTy.isSimple()) {
       int Idx =
           ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
@@ -380,7 +380,7 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
 unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                          unsigned Alignment,
                                          unsigned AddressSpace) {
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
 
   if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
       Src->getVectorElementType()->isIntegerTy(64)) {
@@ -416,7 +416,7 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
   if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
     unsigned NumElts = VecTy->getVectorNumElements();
     Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
-    unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+    unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
 
     // ldN/stN only support legal vector types of size 64 or 128 in bits.
     if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 4dabdadd8eeb..444d3ccc15e1 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -31,7 +31,6 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   typedef TargetTransformInfo TTI;
   friend BaseT;
 
-  const AArch64TargetMachine *TM;
   const AArch64Subtarget *ST;
   const AArch64TargetLowering *TLI;
 
@@ -50,30 +49,15 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
 
 public:
   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
-      : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   AArch64TTIImpl(const AArch64TTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
-        TLI(Arg.TLI) {}
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
   AArch64TTIImpl(AArch64TTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
-        ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
-  AArch64TTIImpl &operator=(const AArch64TTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    TM = RHS.TM;
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  AArch64TTIImpl &operator=(AArch64TTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    TM = std::move(RHS.TM);
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
 
   /// \name Scalar TTI Implementations
   /// @{
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 359c2e734e21..db9fb0e775df 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -228,7 +228,7 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
 }
 
 static MCSymbolizer *
-createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
+createAArch64ExternalSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
                                 LLVMSymbolLookupCallback SymbolLookUp,
                                 void *DisInfo, MCContext *Ctx,
                                 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index b5b1d1f9e19c..16d53569b231 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -199,7 +199,7 @@ MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
 MCTargetStreamer *
 createAArch64ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
   const Triple &TT = STI.getTargetTriple();
-  if (TT.getObjectFormat() == Triple::ELF)
+  if (TT.isOSBinFormatELF())
     return new AArch64TargetELFStreamer(S);
   return nullptr;
 }
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 099d1b01c339..9f7bed0d3b12 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -42,16 +42,13 @@ static MCInstrInfo *createAArch64MCInstrInfo() {
 
 static MCSubtargetInfo *
 createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-
   if (CPU.empty())
     CPU = "generic";
 
-  InitAArch64MCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createAArch64MCSubtargetInfoImpl(TT, CPU, FS);
 }
 
-static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
+static MCRegisterInfo *createAArch64MCRegisterInfo(const Triple &Triple) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitAArch64MCRegisterInfo(X, AArch64::LR);
   return X;
@@ -75,11 +72,11 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createAArch64MCCodeGenInfo(const Triple &TT,
+                                                 Reloc::Model RM,
                                                  CodeModel::Model CM,
                                                  CodeGenOpt::Level OL) {
-  Triple TheTriple(TT);
-  assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
+  assert((TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()) &&
          "Only expect Darwin and ELF targets");
 
   if (CM == CodeModel::Default)
@@ -94,7 +91,7 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
         "Only small and large code models are allowed on AArch64");
 
   // AArch64 Darwin is always PIC.
-  if (TheTriple.isOSDarwin())
+  if (TT.isOSDarwin())
     RM = Reloc::PIC_;
   // On ELF platforms the default static relocation model has a smart enough
   // linker to cope with referencing external symbols defined in a shared
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 569ad3844b25..ef8ef6268548 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -98,6 +98,16 @@ def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
         "true",
         "Enable SI load/store optimizer pass">;
 
+// Performance debugging feature. Allow using DS instruction immediate
+// offsets even if the base pointer can't be proven to be base. On SI,
+// base pointer values that won't give the same result as a 16-bit add
+// are not safe to fold, but this will override the conservative test
+// for the base pointer.
+def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding",
+        "EnableUnsafeDSOffsetFolding",
+        "true",
+        "Force using DS instruction immediate offsets on SI">;
+
 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
         "FlatAddressSpace",
         "true",
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 0b426bc63dd5..ad267d350850 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -22,7 +22,6 @@ using namespace llvm;
 namespace {
 
 class AMDGPUAlwaysInline : public ModulePass {
-
   static char ID;
 
 public:
@@ -36,10 +35,9 @@ public:
 char AMDGPUAlwaysInline::ID = 0;
 
 bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+  std::vector<Function *> FuncsToClone;
 
-  std::vector<Function*> FuncsToClone;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    Function &F = *I;
+  for (Function &F : M) {
     if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
         !F.hasFnAttribute(Attribute::NoInline))
       FuncsToClone.push_back(&F);
@@ -49,12 +47,11 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) {
     ValueToValueMapTy VMap;
     Function *NewFunc = CloneFunction(F, VMap, false);
     NewFunc->setLinkage(GlobalValue::InternalLinkage);
-    F->getParent()->getFunctionList().push_back(NewFunc);
+    M.getFunctionList().push_back(NewFunc);
     F->replaceAllUsesWith(NewFunc);
   }
 
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    Function &F = *I;
+  for (Function &F : M) {
     if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) {
       F.addFnAttr(Attribute::AlwaysInline);
     }
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index df4461eac4db..37b77d778d9f 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -110,8 +110,11 @@ private:
                          SDValue &Offset, SDValue &GLC) const;
   SDNode *SelectAddrSpaceCast(SDNode *N);
   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+  bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
   bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
                        SDValue &Clamp, SDValue &Omod) const;
+  bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+                         SDValue &Clamp, SDValue &Omod) const;
 
   bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
                             SDValue &Omod) const;
@@ -859,7 +862,8 @@ bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
       (OffsetBits == 8 && !isUInt<8>(Offset)))
     return false;
 
-  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
+      Subtarget->unsafeDSOffsetFoldingEnabled())
     return true;
 
   // On Southern Islands instruction with a negative base value and an offset
@@ -1316,6 +1320,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
   return true;
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
+                                         SDValue &SrcMods) const {
+  bool Res = SelectVOP3Mods(In, Src, SrcMods);
+  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
                                          SDValue &SrcMods, SDValue &Clamp,
                                          SDValue &Omod) const {
@@ -1327,6 +1337,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
   return SelectVOP3Mods(In, Src, SrcMods);
 }
 
+bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
+                                           SDValue &SrcMods, SDValue &Clamp,
+                                           SDValue &Omod) const {
+  bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);
+
+  return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
+                cast<ConstantSDNode>(Clamp)->isNullValue() &&
+                cast<ConstantSDNode>(Omod)->isNullValue();
+}
+
 bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
                                               SDValue &SrcMods,
                                               SDValue &Omod) const {
@@ -1351,18 +1371,14 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   do {
     IsModified = false;
     // Go over all selected nodes and try to fold them a bit more
-    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
-         E = CurDAG->allnodes_end(); I != E; ++I) {
-
-      SDNode *Node = I;
-
-      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+    for (SDNode &Node : CurDAG->allnodes()) {
+      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
       if (!MachineNode)
         continue;
 
       SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
-      if (ResNode != Node) {
-        ReplaceUses(Node, ResNode);
+      if (ResNode != &Node) {
+        ReplaceUses(&Node, ResNode);
         IsModified = true;
       }
     }
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d56838ec2019..3a65f3b56146 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -406,6 +406,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
   setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
 
+  setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::SELECT_CC);
@@ -444,7 +445,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
 // Target Information
 //===----------------------------------------------------------------------===//
 
-MVT AMDGPUTargetLowering::getVectorIdxTy() const {
+MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
   return MVT::i32;
 }
 
@@ -545,9 +546,8 @@ bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
 }
 
 bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
-  const DataLayout *DL = getDataLayout();
-  unsigned SrcSize = DL->getTypeSizeInBits(Src->getScalarType());
-  unsigned DestSize = DL->getTypeSizeInBits(Dest->getScalarType());
+  unsigned SrcSize = Src->getScalarSizeInBits();
+  unsigned DestSize = Dest->getScalarSizeInBits();
 
   return SrcSize == 32 && DestSize == 64;
 }
@@ -697,7 +697,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
                                                        const SDValue &InitPtr,
                                                        SDValue Chain,
                                                        SelectionDAG &DAG) const {
-  const DataLayout *TD = getDataLayout();
+  const DataLayout &TD = DAG.getDataLayout();
   SDLoc DL(InitPtr);
   Type *InitTy = Init->getType();
 
@@ -705,20 +705,20 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
     EVT VT = EVT::getEVT(InitTy);
     PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
     return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
-                        MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
-                        TD->getPrefTypeAlignment(InitTy));
+                        MachinePointerInfo(UndefValue::get(PtrTy)), false,
+                        false, TD.getPrefTypeAlignment(InitTy));
   }
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
     EVT VT = EVT::getEVT(CFP->getType());
     PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
     return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
-                 MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
-                 TD->getPrefTypeAlignment(CFP->getType()));
+                        MachinePointerInfo(UndefValue::get(PtrTy)), false,
+                        false, TD.getPrefTypeAlignment(CFP->getType()));
   }
 
   if (StructType *ST = dyn_cast<StructType>(InitTy)) {
-    const StructLayout *SL = TD->getStructLayout(ST);
+    const StructLayout *SL = TD.getStructLayout(ST);
 
     EVT PtrVT = InitPtr.getValueType();
     SmallVector<SDValue, 8> Chains;
@@ -745,7 +745,7 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
     else
       llvm_unreachable("Unexpected type");
 
-    unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
+    unsigned EltSize = TD.getTypeAllocSize(SeqTy->getElementType());
     SmallVector<SDValue, 8> Chains;
     for (unsigned i = 0; i < NumElements; ++i) {
       SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
@@ -762,8 +762,8 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
     EVT VT = EVT::getEVT(InitTy);
     PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
     return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
-                        MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
-                        TD->getPrefTypeAlignment(InitTy));
+                        MachinePointerInfo(UndefValue::get(PtrTy)), false,
+                        false, TD.getPrefTypeAlignment(InitTy));
   }
 
   Init->dump();
@@ -785,7 +785,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
                                                  SDValue Op,
                                                  SelectionDAG &DAG) const {
 
-  const DataLayout *TD = getDataLayout();
+  const DataLayout &DL = DAG.getDataLayout();
   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = G->getGlobal();
 
@@ -801,7 +801,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
 
     unsigned Offset;
     if (MFI->LocalMemoryObjects.count(GV) == 0) {
-      uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+      uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
       Offset = MFI->LDSSize;
       MFI->LocalMemoryObjects[GV] = Offset;
       // XXX: Account for alignment?
@@ -811,16 +811,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
     }
 
     return DAG.getConstant(Offset, SDLoc(Op),
-                           getPointerTy(AMDGPUAS::LOCAL_ADDRESS));
+                           getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS));
   }
   case AMDGPUAS::CONSTANT_ADDRESS: {
     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
     Type *EltType = GV->getType()->getElementType();
-    unsigned Size = TD->getTypeAllocSize(EltType);
-    unsigned Alignment = TD->getPrefTypeAlignment(EltType);
+    unsigned Size = DL.getTypeAllocSize(EltType);
+    unsigned Alignment = DL.getPrefTypeAlignment(EltType);
 
-    MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
-    MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+    MVT PrivPtrVT = getPointerTy(DL, AMDGPUAS::PRIVATE_ADDRESS);
+    MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
 
     int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
     SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
@@ -1653,7 +1653,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
   // fb = fabs(fb);
   fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
 
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), VT);
+  EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 
   // int cv = fr >= fb;
   SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
@@ -1960,7 +1960,8 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
   const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
   const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
 
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
 
   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
@@ -2020,7 +2021,8 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
   SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
 
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
 
   const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
 
@@ -2051,7 +2053,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
   SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
 
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
   SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
 
   return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
@@ -2081,7 +2084,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const
 
   SDValue SignOne = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, One, X);
 
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
 
   SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
 
@@ -2100,8 +2104,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND64(SDValue Op, SelectionDAG &DAG) const
   const SDValue One = DAG.getConstant(1, SL, MVT::i32);
   const SDValue NegOne = DAG.getConstant(-1, SL, MVT::i32);
   const SDValue FiftyOne = DAG.getConstant(51, SL, MVT::i32);
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
-
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
 
   SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
 
@@ -2172,7 +2176,8 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
   const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
   const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
 
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
 
   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
@@ -2411,6 +2416,33 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
                       SN->getBasePtr(), SN->getMemOperand());
 }
 
+SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
+  if (N->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  // i64 (shl x, 32) -> (build_pair 0, x)
+
+  // Doing this with moves theoretically helps MI optimizations that understand
+  // copies. 2 v_mov_b32_e32 will have the same code size / cycle count as
+  // v_lshl_b64. In the SALU case, I think this is slightly worse since it
+  // doubles the code size and I'm unsure about cycle count.
+  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!RHS || RHS->getZExtValue() != 32)
+    return SDValue();
+
+  SDValue LHS = N->getOperand(0);
+
+  SDLoc SL(N);
+  SelectionDAG &DAG = DCI.DAG;
+
+  // Extract low 32-bits.
+  SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
+
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Zero, Lo);
+}
+
 SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   EVT VT = N->getValueType(0);
@@ -2448,17 +2480,24 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
   SDLoc DL(N);
 
   switch(N->getOpcode()) {
-    default: break;
-    case ISD::MUL:
-      return performMulCombine(N, DCI);
-    case AMDGPUISD::MUL_I24:
-    case AMDGPUISD::MUL_U24: {
-      SDValue N0 = N->getOperand(0);
-      SDValue N1 = N->getOperand(1);
-      simplifyI24(N0, DCI);
-      simplifyI24(N1, DCI);
-      return SDValue();
-    }
+  default:
+    break;
+  case ISD::SHL: {
+    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+      break;
+
+    return performShlCombine(N, DCI);
+  }
+  case ISD::MUL:
+    return performMulCombine(N, DCI);
+  case AMDGPUISD::MUL_I24:
+  case AMDGPUISD::MUL_U24: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    simplifyI24(N0, DCI);
+    simplifyI24(N1, DCI);
+    return SDValue();
+  }
   case ISD::SELECT: {
     SDValue Cond = N->getOperand(0);
     if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
@@ -2644,6 +2683,18 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
   return DAG.getRegister(VirtualRegister, VT);
 }
 
+uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
+    const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
+  uint64_t ArgOffset = MFI->ABIArgOffset;
+  switch (Param) {
+  case GRID_DIM:
+    return ArgOffset;
+  case GRID_OFFSET:
+    return ArgOffset + 4;
+  }
+  llvm_unreachable("unexpected implicit parameter type");
+}
+
 #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
 
 const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index fbb7d3c88437..478b2035fd75 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -65,6 +65,7 @@ private:
   SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
 protected:
@@ -123,7 +124,7 @@ public:
 
   bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 
-  MVT getVectorIdxTy() const override;
+  MVT getVectorIdxTy(const DataLayout &) const override;
   bool isSelectSupported(SelectSupportKind) const override;
 
   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
@@ -207,6 +208,16 @@ public:
   virtual SDValue CreateLiveInRegister(SelectionDAG &DAG,
                                        const TargetRegisterClass *RC,
                                        unsigned Reg, EVT VT) const;
+
+  enum ImplicitParameter {
+    GRID_DIM,
+    GRID_OFFSET
+  };
+
+  /// \brief Helper function that returns the byte offset of the given
+  /// type of implicit parameter.
+  unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
+                                      const ImplicitParameter Param) const;
 };
 
 namespace AMDGPUISD {
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0779d1d786b2..bd5abc4f546e 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -69,6 +69,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
       FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
       CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
       EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
+      EnableUnsafeDSOffsetFolding(false),
       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
       EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
       GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 30f50eb1d2f3..90831bfb4458 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -76,6 +76,7 @@ private:
   bool EnablePromoteAlloca;
   bool EnableIfCvt;
   bool EnableLoadStoreOpt;
+  bool EnableUnsafeDSOffsetFolding;
   unsigned WavefrontSize;
   bool CFALUBug;
   int LocalMemorySize;
@@ -222,6 +223,10 @@ public:
     return EnableLoadStoreOpt;
   }
 
+  bool unsafeDSOffsetFoldingEnabled() const {
+    return EnableUnsafeDSOffsetFolding;
+  }
+
   unsigned getWavefrontSize() const {
     return WavefrontSize;
   }
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a9a911a8efed..2297b52b423c 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -156,8 +156,10 @@ public:
 } // End of anonymous namespace
 
 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis(
-      [this](Function &F) { return TargetTransformInfo(AMDGPUTTIImpl(this)); });
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(
+        AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
+  });
 }
 
 void AMDGPUPassConfig::addIRPasses() {
@@ -269,6 +271,7 @@ void GCNPassConfig::addPreRegAlloc() {
     // also need extra copies to the address operand to be eliminated.
     initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
     insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
+    insertPass(&MachineSchedulerID, &RegisterCoalescerID);
   }
   addPass(createSIShrinkInstructionsPass(), false);
   addPass(createSIFixSGPRLiveRangesPass(), false);
@@ -280,10 +283,10 @@ void GCNPassConfig::addPostRegAlloc() {
 }
 
 void GCNPassConfig::addPreSched2() {
-  addPass(createSIInsertWaits(*TM), false);
 }
 
 void GCNPassConfig::addPreEmitPass() {
+  addPass(createSIInsertWaits(*TM), false);
   addPass(createSILowerControlFlowPass(*TM), false);
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 791c84e6f28b..dee0a69d1e68 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -37,8 +37,9 @@ class AMDGPUTTIImpl : public BasicTTIImplBase<AMDGPUTTIImpl> {
   const AMDGPUTargetLowering *getTLI() const { return TLI; }
 
 public:
-  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM)
-      : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+  explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const DataLayout &DL)
+      : BaseT(TM, DL), ST(TM->getSubtargetImpl()),
+        TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
   AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg)
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
-  AMDGPUTTIImpl &operator=(const AMDGPUTTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  AMDGPUTTIImpl &operator=(AMDGPUTTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
 
   bool hasBranchDivergence() { return true; }
 
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 7172e4bb9335..c709741f3777 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -44,7 +44,7 @@ static MCInstrInfo *createAMDGPUMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitAMDGPUMCRegisterInfo(X, 0);
   return X;
@@ -52,14 +52,13 @@ static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *
 createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo * X = new MCSubtargetInfo();
-  InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
-static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM,
-                                               CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(const Triple &TT,
+                                                Reloc::Model RM,
+                                                CodeModel::Model CM,
+                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   X->initMCCodeGenInfo(RM, CM, OL);
   return X;
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 8357b6d9d0ed..4e4d554f0ee7 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -815,8 +815,10 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
     case Intrinsic::r600_read_local_size_z:
       return LowerImplicitParameter(DAG, VT, DL, 8);
 
-    case Intrinsic::AMDGPU_read_workdim:
-      return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+    case Intrinsic::AMDGPU_read_workdim: {
+      uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
+      return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
+    }
 
     case Intrinsic::r600_read_tgid_x:
       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
@@ -897,8 +899,9 @@ SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
 
   for (unsigned i = 0, e = VecVT.getVectorNumElements();
                                                            i != e; ++i) {
-    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
-                               DAG.getConstant(i, DL, getVectorIdxTy())));
+    Args.push_back(DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
+        DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
   }
 
   return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
@@ -1459,22 +1462,17 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
   SDValue Ptr = Op.getOperand(1);
   SDValue LoweredLoad;
 
-  SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
-  if (Ret.getNode()) {
-    SDValue Ops[2] = {
-      Ret,
-      Chain
-    };
-    return DAG.getMergeValues(Ops, DL);
-  }
+  if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
+    return Ret;
 
   // Lower loads constant address space global variable loads
   if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
       isa<GlobalVariable>(GetUnderlyingObject(
-          LoadNode->getMemOperand()->getValue(), *getDataLayout()))) {
+          LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
 
-    SDValue Ptr = DAG.getZExtOrTrunc(LoadNode->getBasePtr(), DL,
-        getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
+    SDValue Ptr = DAG.getZExtOrTrunc(
+        LoadNode->getBasePtr(), DL,
+        getPointerTy(DAG.getDataLayout(), AMDGPUAS::PRIVATE_ADDRESS));
     Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
         DAG.getConstant(2, DL, MVT::i32));
     return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
@@ -1702,7 +1700,8 @@ SDValue R600TargetLowering::LowerFormalArguments(
   return Chain;
 }
 
-EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+                                           EVT VT) const {
    if (!VT.isVector())
      return MVT::i32;
    return VT.changeVectorElementTypeToInteger();
diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h
index c06d3c4fd309..4dbac97af2a1 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/lib/Target/AMDGPU/R600ISelLowering.h
@@ -38,7 +38,9 @@ public:
                               const SmallVectorImpl<ISD::InputArg> &Ins,
                               SDLoc DL, SelectionDAG &DAG,
                               SmallVectorImpl<SDValue> &InVals) const override;
-  EVT getSetCCResultType(LLVMContext &, EVT VT) const override;
+  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
+                         EVT VT) const override;
+
 private:
   unsigned Gen;
   /// Each OpenCL kernel has nine implicit parameters that are stored in the
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index d14e37a64612..c2887255cc11 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -126,11 +126,42 @@ static bool updateOperand(FoldCandidate &Fold,
   return false;
 }
 
+static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
+                              const MachineInstr *MI) {
+  for (auto Candidate : FoldList) {
+    if (Candidate.UseMI == MI)
+      return true;
+  }
+  return false;
+}
+
 static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
                              MachineInstr *MI, unsigned OpNo,
                              MachineOperand *OpToFold,
                              const SIInstrInfo *TII) {
   if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
+
+    // Special case for v_mac_f32_e64 if we are trying to fold into src2
+    unsigned Opc = MI->getOpcode();
+    if (Opc == AMDGPU::V_MAC_F32_e64 &&
+        (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
+      // Check if changing this to a v_mad_f32 instruction will allow us to
+      // fold the operand.
+      MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
+      bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
+      if (FoldAsMAD) {
+        MI->untieRegOperand(OpNo);
+        return true;
+      }
+      MI->setDesc(TII->get(Opc));
+    }
+
+    // If we are already folding into another operand of MI, then
+    // we can't commute the instruction, otherwise we risk making the
+    // other fold illegal.
+    if (isUseMIInFoldList(FoldList, MI))
+      return false;
+
     // Operand is not legal, so try to commute the instruction to
     // see if this makes it possible to fold.
     unsigned CommuteIdx0;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index ead1a3743473..dd818a9ba746 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -254,8 +254,9 @@ bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
   return false;
 }
 
-bool SITargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                             Type *Ty, unsigned AS) const {
+bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                             const AddrMode &AM, Type *Ty,
+                                             unsigned AS) const {
   // No global is ever allowed as a base.
   if (AM.BaseGV)
     return false;
@@ -416,7 +417,7 @@ static EVT toIntegerVT(EVT VT) {
 SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                          SDLoc SL, SDValue Chain,
                                          unsigned Offset, bool Signed) const {
-  const DataLayout *DL = getDataLayout();
+  const DataLayout &DL = DAG.getDataLayout();
   MachineFunction &MF = DAG.getMachineFunction();
   const SIRegisterInfo *TRI =
       static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
@@ -425,16 +426,16 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
 
   MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
-  MVT PtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+  MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
   PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
   SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
                                        MRI.getLiveInVirtReg(InputPtrReg), PtrVT);
   SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
                             DAG.getConstant(Offset, SL, PtrVT));
-  SDValue PtrOffset = DAG.getUNDEF(getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+  SDValue PtrOffset = DAG.getUNDEF(PtrVT);
   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
 
-  unsigned Align = DL->getABITypeAlignment(Ty);
+  unsigned Align = DL.getABITypeAlignment(Ty);
 
   if (VT != MemVT && VT.isFloatingPoint()) {
     // Do an integer load and convert.
@@ -451,7 +452,12 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                true, // isNonTemporal
                                true, // isInvariant
                                Align); // Alignment
-    return DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load);
+    SDValue Ops[] = {
+      DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
+      Load.getValue(1)
+    };
+
+    return DAG.getMergeValues(Ops, SL);
   }
 
   ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
@@ -569,6 +575,8 @@ SDValue SITargetLowering::LowerFormalArguments(
 
   AnalyzeFormalArguments(CCInfo, Splits);
 
+  SmallVector<SDValue, 16> Chains;
+
   for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
 
     const ISD::InputArg &Arg = Ins[i];
@@ -587,8 +595,9 @@ SDValue SITargetLowering::LowerFormalArguments(
                               VA.getLocMemOffset();
       // The first 36 bytes of the input buffer contains information about
       // thread group and global sizes.
-      SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, DAG.getRoot(),
+      SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, Chain,
                                    Offset, Ins[i].Flags.isSExt());
+      Chains.push_back(Arg.getValue(1));
 
       const PointerType *ParamTy =
         dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
@@ -614,7 +623,8 @@ SDValue SITargetLowering::LowerFormalArguments(
       Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
                                      &AMDGPU::SReg_64RegClass);
       Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
-      InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+      SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+      InVals.push_back(Copy);
       continue;
     }
 
@@ -634,7 +644,9 @@ SDValue SITargetLowering::LowerFormalArguments(
       for (unsigned j = 1; j != NumElements; ++j) {
         Reg = ArgLocs[ArgIdx++].getLocReg();
         Reg = MF.addLiveIn(Reg, RC);
-        Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
+
+        SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
+        Regs.push_back(Copy);
       }
 
       // Fill up the missing vector elements
@@ -653,7 +665,11 @@ SDValue SITargetLowering::LowerFormalArguments(
         AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
     Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
   }
-  return Chain;
+
+  if (Chains.empty())
+    return Chain;
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
 }
 
 MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
@@ -695,14 +711,15 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
   return true;
 }
 
-EVT SITargetLowering::getSetCCResultType(LLVMContext &Ctx, EVT VT) const {
+EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
+                                         EVT VT) const {
   if (!VT.isVector()) {
     return MVT::i1;
   }
   return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
 }
 
-MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
+MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT) const {
   return MVT::i32;
 }
 
@@ -888,7 +905,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
 
   SDLoc DL(GSD);
   const GlobalValue *GV = GSD->getGlobal();
-  MVT PtrVT = getPointerTy(GSD->getAddressSpace());
+  MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
 
   SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
   SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
@@ -926,6 +943,7 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
 SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                   SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
+  auto MFI = MF.getInfo<SIMachineFunctionInfo>();
   const SIRegisterInfo *TRI =
       static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
 
@@ -964,8 +982,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
   case Intrinsic::AMDGPU_read_workdim:
     return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
-                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
-                          false);
+                          getImplicitParameterOffset(MFI, GRID_DIM), false);
 
   case Intrinsic::r600_read_tgid_x:
     return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
@@ -1213,7 +1230,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
 
   const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
 
-  EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f32);
+  EVT SetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);
 
   SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
 
@@ -1411,7 +1429,7 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
     unsigned AS = Load->getAddressSpace();
     unsigned Align = Load->getAlignment();
     Type *Ty = LoadVT.getTypeForEVT(*DAG.getContext());
-    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+    unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
 
     // Don't try to replace the load if we have to expand it due to alignment
     // problems. Otherwise we will end up scalarizing the load, and trying to
@@ -2212,9 +2230,8 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
 
 std::pair<unsigned, const TargetRegisterClass *>
 SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                               const std::string &Constraint_,
+                                               StringRef Constraint,
                                                MVT VT) const {
-  StringRef Constraint(Constraint_);
   if (Constraint == "r") {
     switch(VT.SimpleTy) {
       default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index a956b013bdb1..635b4edc89de 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -62,8 +62,8 @@ public:
   bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
                           EVT /*VT*/) const override;
 
-  bool isLegalAddressingMode(const AddrMode &AM,
-                             Type *Ty, unsigned AS) const override;
+  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+                             unsigned AS) const override;
 
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
@@ -90,8 +90,9 @@ public:
   MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
                                       MachineBasicBlock * BB) const override;
   bool enableAggressiveFMAFusion(EVT VT) const override;
-  EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
-  MVT getScalarShiftAmountTy(EVT VT) const override;
+  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                         EVT VT) const override;
+  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
@@ -114,9 +115,9 @@ public:
                                   SDLoc DL,
                                   SDValue Ptr) const;
 
-  std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
-                                   const TargetRegisterInfo *TRI,
-                                   const std::string &Constraint, MVT VT) const override;
+  std::pair<unsigned, const TargetRegisterClass *>
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               StringRef Constraint, MVT VT) const override;
   SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
 };
 
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index eb96bd0227b2..18910615bebe 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -227,9 +227,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
 
     uint8_t Offset0 = Offset0Imm->getImm();
     uint8_t Offset1 = Offset1Imm->getImm();
-    assert(Offset1 > Offset0);
 
-    if (Offset1 - Offset0 == 1) {
+    if (Offset1 > Offset0 && Offset1 - Offset0 == 1) {
       // Each of these offsets is in element sized units, so we need to convert
       // to bytes of the individual reads.
 
@@ -924,7 +923,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
     return false;
 
   unsigned Opc = UseMI->getOpcode();
-  if (Opc == AMDGPU::V_MAD_F32) {
+  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
     // Don't fold if we are using source modifiers. The new VOP2 instructions
     // don't have them.
     if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
@@ -963,9 +962,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
       // instead of having to modify in place.
 
       // Remove these first since they are at the end.
-      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
                                                       AMDGPU::OpName::omod));
-      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
                                                       AMDGPU::OpName::clamp));
 
       unsigned Src1Reg = Src1->getReg();
@@ -980,6 +979,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
       Src1->setSubReg(Src2SubReg);
       Src1->setIsKill(Src2->isKill());
 
+      if (Opc == AMDGPU::V_MAC_F32_e64) {
+        UseMI->untieRegOperand(
+          AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+      }
+
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
+                                                      AMDGPU::OpName::src2));
+      // ChangingToImmediate adds Src2 back to the instruction.
       Src2->ChangeToImmediate(Imm);
 
       removeModOperands(*UseMI);
@@ -1010,11 +1017,17 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
       // instead of having to modify in place.
 
       // Remove these first since they are at the end.
-      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
                                                       AMDGPU::OpName::omod));
-      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
+      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
                                                       AMDGPU::OpName::clamp));
 
+      if (Opc == AMDGPU::V_MAC_F32_e64) {
+        UseMI->untieRegOperand(
+          AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
+      }
+
+      // ChangingToImmediate adds Src2 back to the instruction.
       Src2->ChangeToImmediate(Imm);
 
       // These come before src2.
@@ -1126,6 +1139,38 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
   return false;
 }
 
+MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
+                                                MachineBasicBlock::iterator &MI,
+                                                LiveVariables *LV) const {
+
+  switch (MI->getOpcode()) {
+    default: return nullptr;
+    case AMDGPU::V_MAC_F32_e64: break;
+    case AMDGPU::V_MAC_F32_e32: {
+      const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+      if (Src0->isImm() && !isInlineConstant(*Src0, 4))
+        return nullptr;
+      break;
+    }
+  }
+
+  const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst);
+  const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
+  const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1);
+  const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2);
+
+  return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
+                 .addOperand(*Dst)
+                 .addImm(0) // Src0 mods
+                 .addOperand(*Src0)
+                 .addImm(0) // Src1 mods
+                 .addOperand(*Src1)
+                 .addImm(0) // Src mods
+                 .addOperand(*Src2)
+                 .addImm(0)  // clamp
+                 .addImm(0); // omod
+}
+
 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
   int64_t SVal = Imm.getSExtValue();
   if (SVal >= -16 && SVal <= 64)
@@ -1625,7 +1670,10 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
 
   if (MO->isReg()) {
     assert(DefinedRC);
-    const TargetRegisterClass *RC = MRI.getRegClass(MO->getReg());
+    const TargetRegisterClass *RC =
+        TargetRegisterInfo::isVirtualRegister(MO->getReg()) ?
+            MRI.getRegClass(MO->getReg()) :
+            RI.getPhysRegClass(MO->getReg());
 
     // In order to be legal, the common sub-class must be equal to the
     // class of the current operand.  For example:
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 0382272068d2..015ea12d4598 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -144,6 +144,10 @@ public:
 
   unsigned getMachineCSELookAheadLimit() const override { return 500; }
 
+  MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
+                                      MachineBasicBlock::iterator &MI,
+                                      LiveVariables *LV) const override;
+
   bool isSALU(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index fcb58d5da3b0..b39a78714640 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -529,9 +529,11 @@ def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
 def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
 
 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
 def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
 def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
 def VOP3Mods  : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
+def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
 
 //===----------------------------------------------------------------------===//
 // SI assembler operands
@@ -1113,6 +1115,13 @@ def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
   field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
   field string Asm = "$dst, $src0, $vsrc1, $src2";
 }
+def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
+  let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
+                             HasModifiers>.ret;
+  let Asm32 = getAsm32<2>.ret;
+  let Asm64 = getAsm64<2, HasModifiers>.ret;
+}
 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 8c8d836776db..1ee63c675822 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -1488,7 +1488,10 @@ defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
 defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
 defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
 
-defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
+let Constraints = "$dst = $src2", DisableEncoding="$src2",
+    isConvertibleToThreeAddress = 1 in {
+defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>;
+}
 } // End isCommutable = 1
 
 defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
@@ -2206,6 +2209,15 @@ def : Pat <
   (V_CNDMASK_B32_e64 $src2, $src1, $src0)
 >;
 
+// Pattern for V_MAC_F32
+def : Pat <
+  (fmad  (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+         (VOP3NoMods f32:$src1, i32:$src1_modifiers),
+         (VOP3NoMods f32:$src2, i32:$src2_modifiers)),
+  (V_MAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+                 $src2_modifiers, $src2, $clamp, $omod)
+>;
+
 /********** ======================= **********/
 /********** Image sampling patterns **********/
 /********** ======================= **********/
diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 9b1d256dc5a8..1bdb1f0ee9f9 100644
--- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -214,12 +214,11 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
   // cases, like vectors of pointers.
   const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
 
-  unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
-  unsigned DestReg1
-    = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst)->getReg();
+  const MachineOperand *Dest0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst);
+  const MachineOperand *Dest1 = TII->getNamedOperand(*Paired, AMDGPU::OpName::vdst);
 
   unsigned Offset0
-          = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
+    = TII->getNamedOperand(*I, AMDGPU::OpName::offset)->getImm() & 0xffff;
   unsigned Offset1
     = TII->getNamedOperand(*Paired, AMDGPU::OpName::offset)->getImm() & 0xffff;
 
@@ -258,20 +257,43 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
 
   unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
   unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
-  updateRegDefsUses(DestReg0, DestReg, SubRegIdx0);
-  updateRegDefsUses(DestReg1, DestReg, SubRegIdx1);
 
-  LIS->RemoveMachineInstrFromMaps(I);
-  // Replacing Paired in the maps with Read2 allows us to avoid updating the
-  // live range for the m0 register.
-  LIS->ReplaceMachineInstrInMaps(Paired, Read2);
+  const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+
+  // Copy to the old destination registers.
+  MachineInstr *Copy0 = BuildMI(*MBB, I, DL, CopyDesc)
+    .addOperand(*Dest0) // Copy to same destination including flags and sub reg.
+    .addReg(DestReg, 0, SubRegIdx0);
+  MachineInstr *Copy1 = BuildMI(*MBB, I, DL, CopyDesc)
+    .addOperand(*Dest1)
+    .addReg(DestReg, RegState::Kill, SubRegIdx1);
+
+  LIS->InsertMachineInstrInMaps(Read2);
+
+  // repairLiveintervalsInRange() doesn't handle physical register, so we have
+  // to update the M0 range manually.
+  SlotIndex PairedIndex = LIS->getInstructionIndex(Paired);
+  LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
+  LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
+  bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
+
+  // The new write to the original destination register is now the copy. Steal
+  // the old SlotIndex.
+  LIS->ReplaceMachineInstrInMaps(I, Copy0);
+  LIS->ReplaceMachineInstrInMaps(Paired, Copy1);
+
   I->eraseFromParent();
   Paired->eraseFromParent();
 
   LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
   LIS->shrinkToUses(&AddrRegLI);
 
-  LIS->getInterval(DestReg); // Create new LI
+  LIS->createAndComputeVirtRegInterval(DestReg);
+
+  if (UpdateM0Range) {
+    SlotIndex Read2Index = LIS->getInstructionIndex(Read2);
+    M0Segment->end = Read2Index.getRegSlot();
+  }
 
   DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
   return Read2.getInstr();
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 587ea63d6796..d23b92edef33 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -53,7 +53,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
   if (!LaneVGPRs.count(LaneVGPRIdx)) {
     unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
     LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
-    MRI.setPhysRegUsed(LaneVGPR);
 
     // Add this register as live-in to all blocks to avoid machine verifer
     // complaining about use of an undefined physical register.
diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
index 0a7f684552f0..b086d2ed6652 100644
--- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
@@ -91,7 +91,6 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
 
   if (ScratchOffsetReg != AMDGPU::NoRegister) {
     // Found an SGPR to use
-    MRI.setPhysRegUsed(ScratchOffsetReg);
     BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
             .addReg(ScratchOffsetPreloadReg);
   } else {
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index db2ff0b1f952..ce4acafac9fa 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -499,7 +499,7 @@ unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
 
   for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
        I != E; ++I) {
-    if (!MRI.isPhysRegUsed(*I))
+    if (MRI.reg_nodbg_empty(*I))
       return *I;
   }
   return AMDGPU::NoRegister;
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 51e72cdb5f9e..5d00bdd6a9bb 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -94,8 +94,20 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
   // is vcc.  We should handle this the same way we handle vopc, by addding
   // a register allocation hint pre-regalloc and then do the shrining
   // post-regalloc.
-  if (Src2)
-    return false;
+  if (Src2) {
+    switch (MI.getOpcode()) {
+      default: return false;
+
+      case AMDGPU::V_MAC_F32_e64:
+        if (!isVGPR(Src2, TRI, MRI) ||
+            TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
+          return false;
+        break;
+
+      case AMDGPU::V_CNDMASK_B32_e64:
+        break;
+    }
+  }
 
   const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
   const MachineOperand *Src1Mod =
@@ -149,7 +161,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
     return;
 
   // Try to fold Src0
-  if (Src0.isReg()) {
+  if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
     unsigned Reg = Src0.getReg();
     MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
     if (Def && Def->isMoveImmediate()) {
@@ -243,6 +255,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
           continue;
       }
 
+      if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
+        // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
+        // instructions.
+        const MachineOperand *Src2 =
+            TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+        if (!Src2->isReg())
+          continue;
+        unsigned SReg = Src2->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(SReg)) {
+          MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC);
+          continue;
+        }
+        if (SReg != AMDGPU::VCC)
+          continue;
+      }
+
       // We can shrink this instruction
       DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
 
@@ -259,6 +287,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
       if (Src1)
         Inst32.addOperand(*Src1);
 
+      const MachineOperand *Src2 =
+          TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+      if (Src2)
+        Inst32.addOperand(*Src2);
+
       ++NumInstructionsShrunk;
       MI.eraseFromParent();
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 96b4742da2bb..ef609a66d032 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -150,6 +150,10 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
 def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
                                        "NaCl trap">;
 
+def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
+                                        "Generate calls via indirect call "
+                                        "instructions">;
+
 // ARM ISAs.
 def HasV4TOps   : SubtargetFeature<"v4t", "HasV4TOps", "true",
                                    "Support ARM v4T instructions">;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index b1a11d626bda..9f43e732bd73 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1230,8 +1230,7 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   Reloc::Model RM = MF.getTarget().getRelocationModel();
 
   if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
-    assert(getSubtarget().getTargetTriple().getObjectFormat() ==
-           Triple::MachO &&
+    assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
            "LOAD_STACK_GUARD currently supported only for MachO.");
     expandLoadStackGuard(MI, RM);
     MI->getParent()->erase(MI);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 3f79a9b53d70..e7d5be7753e4 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -127,7 +127,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
 BitVector ARMBaseRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-  const TargetFrameLowering *TFI = STI.getFrameLowering();
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
 
   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
@@ -194,7 +194,7 @@ unsigned
 ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                          MachineFunction &MF) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-  const TargetFrameLowering *TFI = STI.getFrameLowering();
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
 
   switch (RC->getID()) {
   default:
@@ -302,7 +302,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
 bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
 
   // When outgoing call frames are so large that we adjust the stack pointer
   // around the call, we can no longer use the stack pointer to reach the
@@ -333,6 +333,7 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
 bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   const MachineRegisterInfo *MRI = &MF.getRegInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
   // We can't realign the stack if:
   // 1. Dynamic stack realignment is explicitly disabled,
   // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
@@ -347,7 +348,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
     return false;
   // We may also need a base pointer if there are dynamic allocas or stack
   // pointer adjustments around calls.
-  if (MF.getSubtarget().getFrameLowering()->hasReservedCallFrame(MF))
+  if (TFI->hasReservedCallFrame(MF))
     return true;
   // A base pointer is required and allowed.  Check that it isn't too late to
   // reserve it.
@@ -357,9 +358,9 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 bool ARMBaseRegisterInfo::
 needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
   const Function *F = MF.getFunction();
-  unsigned StackAlign =
-      MF.getSubtarget().getFrameLowering()->getStackAlignment();
+  unsigned StackAlign = TFI->getStackAlignment();
   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                               F->hasFnAttribute(Attribute::StackAlignment));
 
@@ -378,7 +379,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
 unsigned
 ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-  const TargetFrameLowering *TFI = STI.getFrameLowering();
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
 
   if (TFI->hasFP(MF))
     return getFramePointerReg(STI);
@@ -517,7 +518,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // Note that the incoming offset is based on the SP value at function entry,
   // so it'll be negative.
   MachineFunction &MF = *MI->getParent()->getParent();
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
@@ -694,8 +695,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineFunction &MF = *MBB.getParent();
   const ARMBaseInstrInfo &TII =
       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>(
-      MF.getSubtarget().getFrameLowering());
+  const ARMFrameLowering *TFI = getFrameLowering(MF);
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   assert(!AFI->isThumb1OnlyFunction() &&
          "This eliminateFrameIndex does not support Thumb1!");
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7dd21ecbe91b..27cf06b995a0 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -142,6 +142,9 @@ def CC_ARM_AAPCS : CallingConv<[
   // Handles byval parameters.
   CCIfByVal<CCPassByVal<4, 4>>,
 
+  // The 'nest' parameter, if any, is passed in R12.
+  CCIfNest<CCAssignToReg<[R12]>>,
+
   // Handle all vector types as either f64 or v2f64.
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 4175b4af86e6..fdd0763ea608 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -49,8 +49,6 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-extern cl::opt<bool> EnableARMLongCalls;
-
 namespace {
 
   // All possible address modes, plus some.
@@ -685,7 +683,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
 }
 
 unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
-  EVT CEVT = TLI.getValueType(C->getType(), true);
+  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 
   // Only handle simple types.
   if (!CEVT.isSimple()) return 0;
@@ -732,7 +730,7 @@ unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
 }
 
 bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
-  EVT evt = TLI.getValueType(Ty, true);
+  EVT evt = TLI.getValueType(DL, Ty, true);
 
   // Only handle simple types.
   if (evt == MVT::Other || !evt.isSimple()) return false;
@@ -786,12 +784,13 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
       return ARMComputeAddress(U->getOperand(0), Addr);
     case Instruction::IntToPtr:
       // Look past no-op inttoptrs.
-      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+          TLI.getPointerTy(DL))
         return ARMComputeAddress(U->getOperand(0), Addr);
       break;
     case Instruction::PtrToInt:
       // Look past no-op ptrtoints.
-      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
         return ARMComputeAddress(U->getOperand(0), Addr);
       break;
     case Instruction::GetElementPtr: {
@@ -1365,7 +1364,7 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
 bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
                              bool isZExt) {
   Type *Ty = Src1Value->getType();
-  EVT SrcEVT = TLI.getValueType(Ty, true);
+  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
   if (!SrcEVT.isSimple()) return false;
   MVT SrcVT = SrcEVT.getSimpleVT();
 
@@ -1557,7 +1556,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
     return false;
 
   Value *Src = I->getOperand(0);
-  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
   if (!SrcEVT.isSimple())
     return false;
   MVT SrcVT = SrcEVT.getSimpleVT();
@@ -1750,7 +1749,7 @@ bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
 }
 
 bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
-  EVT DestVT  = TLI.getValueType(I->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
 
   // We can get here in the case when we have a binary operation on a non-legal
   // type and the target independent selector doesn't know how to handle it.
@@ -1790,7 +1789,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
 }
 
 bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
-  EVT FPVT = TLI.getValueType(I->getType(), true);
+  EVT FPVT = TLI.getValueType(DL, I->getType(), true);
   if (!FPVT.isSimple()) return false;
   MVT VT = FPVT.getSimpleVT();
 
@@ -2095,7 +2094,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
   CallingConv::ID CC = F.getCallingConv();
   if (Ret->getNumOperands() > 0) {
     SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
@@ -2122,7 +2121,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
       return false;
 
     unsigned SrcReg = Reg + VA.getValNo();
-    EVT RVEVT = TLI.getValueType(RV->getType());
+    EVT RVEVT = TLI.getValueType(DL, RV->getType());
     if (!RVEVT.isSimple()) return false;
     MVT RVVT = RVEVT.getSimpleVT();
     MVT DestVT = VA.getValVT();
@@ -2173,7 +2172,7 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
 unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
   // Manually compute the global's type to avoid building it when unnecessary.
   Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
-  EVT LCREVT = TLI.getValueType(GVTy);
+  EVT LCREVT = TLI.getValueType(DL, GVTy);
   if (!LCREVT.isSimple()) return 0;
 
   GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
@@ -2246,19 +2245,19 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
     return false;
 
   unsigned CalleeReg = 0;
-  if (EnableARMLongCalls) {
+  if (Subtarget->genLongCalls()) {
     CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
     if (CalleeReg == 0) return false;
   }
 
   // Issue the call.
-  unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
+  unsigned CallOpc = ARMSelectCallOp(Subtarget->genLongCalls());
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
                                     DbgLoc, TII.get(CallOpc));
   // BL / BLX don't take a predicate, but tBL / tBLX do.
   if (isThumb2)
     AddDefaultPred(MIB);
-  if (EnableARMLongCalls)
+  if (Subtarget->genLongCalls())
     MIB.addReg(CalleeReg);
   else
     MIB.addExternalSymbol(TLI.getLibcallName(Call));
@@ -2380,7 +2379,7 @@ bool ARMFastISel::SelectCall(const Instruction *I,
 
   bool UseReg = false;
   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
-  if (!GV || EnableARMLongCalls) UseReg = true;
+  if (!GV || Subtarget->genLongCalls()) UseReg = true;
 
   unsigned CalleeReg = 0;
   if (UseReg) {
@@ -2576,8 +2575,8 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) {
   Value *Op = I->getOperand(0);
 
   EVT SrcVT, DestVT;
-  SrcVT = TLI.getValueType(Op->getType(), true);
-  DestVT = TLI.getValueType(I->getType(), true);
+  SrcVT = TLI.getValueType(DL, Op->getType(), true);
+  DestVT = TLI.getValueType(DL, I->getType(), true);
 
   if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
     return false;
@@ -2742,8 +2741,8 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) {
   if (!SrcReg) return false;
 
   EVT SrcEVT, DestEVT;
-  SrcEVT = TLI.getValueType(SrcTy, true);
-  DestEVT = TLI.getValueType(DestTy, true);
+  SrcEVT = TLI.getValueType(DL, SrcTy, true);
+  DestEVT = TLI.getValueType(DL, DestTy, true);
   if (!SrcEVT.isSimple()) return false;
   if (!DestEVT.isSimple()) return false;
 
@@ -2763,7 +2762,7 @@ bool ARMFastISel::SelectShift(const Instruction *I,
     return false;
 
   // Only handle i32 now.
-  EVT DestVT = TLI.getValueType(I->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
   if (DestVT != MVT::i32)
     return false;
 
@@ -3026,7 +3025,7 @@ bool ARMFastISel::fastLowerArguments() {
     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
       return false;
 
-    EVT ArgVT = TLI.getValueType(ArgTy);
+    EVT ArgVT = TLI.getValueType(DL, ArgTy);
     if (!ArgVT.isSimple()) return false;
     switch (ArgVT.getSimpleVT().SimpleTy) {
     case MVT::i8:
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index a52e49780e27..6744000afe2b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -800,7 +800,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
           // This is bad, if an interrupt is taken after the mov, sp is in an
           // inconsistent state.
           // Use the first callee-saved register as a scratch register.
-          assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+          assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
                  "No scratch register to restore SP from FP!");
           emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                  ARMCC::AL, 0, TII);
@@ -1470,7 +1470,8 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
 // callee-saved vector registers after realigning the stack. The vst1 and vld1
 // instructions take alignment hints that can improve performance.
 //
-static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+static void
+checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
   if (!SpillAlignedNEONRegs)
     return;
@@ -1497,10 +1498,9 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
   // callee-saved registers in order, but it can happen that there are holes in
   // the range.  Registers above the hole will be spilled to the standard DPRCS
   // area.
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   unsigned NumSpills = 0;
   for (; NumSpills < 8; ++NumSpills)
-    if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
+    if (!SavedRegs.test(ARM::D8 + NumSpills))
       break;
 
   // Don't do this for just one d-register. It's not worth it.
@@ -1511,12 +1511,13 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
   MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
 
   // A scratch register is required for the vst1 / vld1 instructions.
-  MF.getRegInfo().setPhysRegUsed(ARM::R4);
+  SavedRegs.set(ARM::R4);
 }
 
-void
-ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                       RegScavenger *RS) const {
+void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                            BitVector &SavedRegs,
+                                            RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   // This tells PEI to spill the FP as if it is any other callee-save register
   // to take advantage the eliminateFrameIndex machinery. This also ensures it
   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
@@ -1543,12 +1544,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // FIXME: It will be better just to find spare register here.
   if (AFI->isThumb2Function() &&
       (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
-    MRI.setPhysRegUsed(ARM::R4);
+    SavedRegs.set(ARM::R4);
 
   if (AFI->isThumb1OnlyFunction()) {
     // Spill LR if Thumb1 function uses variable length argument lists.
     if (AFI->getArgRegsSaveSize() > 0)
-      MRI.setPhysRegUsed(ARM::LR);
+      SavedRegs.set(ARM::LR);
 
     // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
     // for sure what the stack size will be, but for this, an estimate is good
@@ -1558,23 +1559,23 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     // FIXME: It will be better just to find spare register here.
     unsigned StackSize = MFI->estimateStackSize(MF);
     if (MFI->hasVarSizedObjects() || StackSize > 508)
-      MRI.setPhysRegUsed(ARM::R4);
+      SavedRegs.set(ARM::R4);
   }
 
   // See if we can spill vector registers to aligned stack.
-  checkNumAlignedDPRCS2Regs(MF);
+  checkNumAlignedDPRCS2Regs(MF, SavedRegs);
 
   // Spill the BasePtr if it's used.
   if (RegInfo->hasBasePointer(MF))
-    MRI.setPhysRegUsed(RegInfo->getBaseRegister());
+    SavedRegs.set(RegInfo->getBaseRegister());
 
   // Don't spill FP if the frame can be eliminated. This is determined
-  // by scanning the callee-save registers to see if any is used.
+  // by scanning the callee-save registers to see if any is modified.
   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
     bool Spilled = false;
-    if (MRI.isPhysRegUsed(Reg)) {
+    if (SavedRegs.test(Reg)) {
       Spilled = true;
       CanEliminateFrame = false;
     }
@@ -1668,7 +1669,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
     // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
     if (!LRSpilled && CS1Spilled) {
-      MRI.setPhysRegUsed(ARM::LR);
+      SavedRegs.set(ARM::LR);
       NumGPRSpills++;
       SmallVectorImpl<unsigned>::iterator LRPos;
       LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
@@ -1681,7 +1682,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     }
 
     if (hasFP(MF)) {
-      MRI.setPhysRegUsed(FramePtr);
+      SavedRegs.set(FramePtr);
       auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
                              FramePtr);
       if (FPPos != UnspilledCS1GPRs.end())
@@ -1700,7 +1701,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
           // Don't spill high register if the function is thumb
           if (!AFI->isThumbFunction() ||
               isARMLowRegister(Reg) || Reg == ARM::LR) {
-            MRI.setPhysRegUsed(Reg);
+            SavedRegs.set(Reg);
             if (!MRI.isReserved(Reg))
               ExtraCSSpill = true;
             break;
@@ -1708,7 +1709,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         }
       } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
         unsigned Reg = UnspilledCS2GPRs.front();
-        MRI.setPhysRegUsed(Reg);
+        SavedRegs.set(Reg);
         if (!MRI.isReserved(Reg))
           ExtraCSSpill = true;
       }
@@ -1747,7 +1748,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
       }
       if (Extras.size() && NumExtras == 0) {
         for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
-          MRI.setPhysRegUsed(Extras[i]);
+          SavedRegs.set(Extras[i]);
         }
       } else if (!AFI->isThumb1OnlyFunction()) {
         // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
@@ -1761,7 +1762,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   }
 
   if (ForceLRSpill) {
-    MRI.setPhysRegUsed(ARM::LR);
+    SavedRegs.set(ARM::LR);
     AFI->setLRIsSpilledForFarJump(true);
   }
 }
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index d763d17a506f..6fdc5eff5e47 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -54,8 +54,8 @@ public:
                                  unsigned &FrameReg, int SPAdj) const;
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS) const override;
 
   void adjustForSegmentedStacks(MachineFunction &MF,
                                 MachineBasicBlock &MBB) const override;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 50afb192b331..b110628a0a86 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -533,7 +533,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       // Match frame index.
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(
+          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
       return true;
     }
@@ -556,7 +557,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
       Base   = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       }
       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
@@ -702,7 +704,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(
+          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     } else if (N.getOpcode() == ARMISD::Wrapper &&
                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
       Base = N.getOperand(0);
@@ -722,7 +725,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       }
       Offset = CurDAG->getRegister(0, MVT::i32);
 
@@ -900,7 +904,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(
+          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
@@ -915,7 +920,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
     Base = N.getOperand(0);
     if (Base.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(
+          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
 
@@ -964,7 +970,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(
+          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     } else if (N.getOpcode() == ARMISD::Wrapper &&
                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
       Base = N.getOperand(0);
@@ -981,7 +988,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
     Base = N.getOperand(0);
     if (Base.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(
+          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     }
 
     ARM_AM::AddrOpc AddSub = ARM_AM::add;
@@ -1215,7 +1223,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
     MachineFrameInfo *MFI = MF->getFrameInfo();
     if (MFI->getObjectAlignment(FI) < 4)
       MFI->setObjectAlignment(FI, 4);
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    Base = CurDAG->getTargetFrameIndex(
+        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
     return true;
   }
@@ -1237,7 +1246,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
         MachineFrameInfo *MFI = MF->getFrameInfo();
         if (MFI->getObjectAlignment(FI) < 4)
           MFI->setObjectAlignment(FI, 4);
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       }
       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
@@ -1285,7 +1295,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       // Match frame index.
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
-      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+      Base = CurDAG->getTargetFrameIndex(
+          FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
       return true;
     }
@@ -1314,7 +1325,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
       Base   = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       }
       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
@@ -1343,7 +1355,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::FrameIndex) {
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-        Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+        Base = CurDAG->getTargetFrameIndex(
+            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       }
       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
       return true;
@@ -1438,7 +1451,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
   Base = N.getOperand(0);
   if (Base.getOpcode() == ISD::FrameIndex) {
     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
-    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    Base = CurDAG->getTargetFrameIndex(
+        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   }
 
   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
@@ -2510,7 +2524,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     if (UseCP) {
       SDValue CPIdx = CurDAG->getTargetConstantPool(
           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
-          TLI->getPointerTy());
+          TLI->getPointerTy(CurDAG->getDataLayout()));
 
       SDNode *ResNode;
       if (Subtarget->isThumb()) {
@@ -2540,7 +2554,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   case ISD::FrameIndex: {
     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    SDValue TFI = CurDAG->getTargetFrameIndex(
+        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     if (Subtarget->isThumb1Only()) {
       // Set the alignment of the frame object to 4, to avoid having to generate
       // more than one ADD
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 4b2105b7442f..e335784f6d87 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -60,11 +60,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
 
-cl::opt<bool>
-EnableARMLongCalls("arm-long-calls", cl::Hidden,
-  cl::desc("Generate calls via indirect call instructions"),
-  cl::init(false));
-
 static cl::opt<bool>
 ARMInterworking("arm-interworking", cl::Hidden,
   cl::desc("Enable / disable ARM interworking (for debugging only)"),
@@ -548,6 +543,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
 
+    // NEON does not have single instruction CTTZ for vectors.
+    setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
+    setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
+    setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
+    setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
+
+    setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
+    setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
+    setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
+    setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
+
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
+
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
+
     // NEON only has FMA instructions as of VFP4.
     if (!Subtarget->hasVFP4()) {
       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
@@ -1149,8 +1165,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   return nullptr;
 }
 
-EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
-  if (!VT.isVector()) return getPointerTy();
+EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+                                          EVT VT) const {
+  if (!VT.isVector())
+    return getPointerTy(DL);
   return VT.changeVectorElementTypeToInteger();
 }
 
@@ -1429,7 +1447,8 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     ISD::ArgFlagsTy Flags) const {
   unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
-  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+                       StackPtr, PtrOff);
   return DAG.getStore(Chain, dl, Arg, PtrOff,
                       MachinePointerInfo::getStack(LocMemOffset),
                       false, false, 0);
@@ -1453,7 +1472,8 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
   else {
     assert(NextVA.isMemLoc());
     if (!StackPtr.getNode())
-      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
+                                    getPointerTy(DAG.getDataLayout()));
 
     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
                                            dl, DAG, NextVA,
@@ -1526,7 +1546,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Chain = DAG.getCALLSEQ_START(Chain,
                                  DAG.getIntPtrConstant(NumBytes, dl, true), dl);
 
-  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+  SDValue StackPtr =
+      DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
 
   RegsToPassVector RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
@@ -1607,7 +1628,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         unsigned RegBegin, RegEnd;
         CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
 
-        EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+        EVT PtrVT =
+            DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
         unsigned int i, j;
         for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
           SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
@@ -1628,12 +1650,12 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       }
 
       if (Flags.getByValSize() > 4*offset) {
+        auto PtrVT = getPointerTy(DAG.getDataLayout());
         unsigned LocMemOffset = VA.getLocMemOffset();
         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
-        SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
-                                  StkPtrOff);
+        SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
-        SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
+        SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
                                            MVT::i32);
         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
@@ -1693,8 +1715,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool isARMFunc = false;
   bool isLocalARMFunc = false;
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  auto PtrVt = getPointerTy(DAG.getDataLayout());
 
-  if (EnableARMLongCalls) {
+  if (Subtarget->genLongCalls()) {
     assert((Subtarget->isTargetWindows() ||
             getTargetMachine().getRelocationModel() == Reloc::Static) &&
            "long-calls with non-static relocation model!");
@@ -1709,12 +1732,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
 
       // Get the address of the callee into a register
-      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-      Callee = DAG.getLoad(getPointerTy(), dl,
-                           DAG.getEntryNode(), CPAddr,
-                           MachinePointerInfo::getConstantPool(),
-                           false, false, false, 0);
+      Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(), false, false,
+                           false, 0);
     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
       const char *Sym = S->getSymbol();
 
@@ -1724,29 +1746,28 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
                                       ARMPCLabelIndex, 0);
       // Get the address of the callee into a register
-      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-      Callee = DAG.getLoad(getPointerTy(), dl,
-                           DAG.getEntryNode(), CPAddr,
-                           MachinePointerInfo::getConstantPool(),
-                           false, false, false, 0);
+      Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(), false, false,
+                           false, 0);
     }
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = G->getGlobal();
     isDirect = true;
-    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
-    bool isStub = (isExt && Subtarget->isTargetMachO()) &&
+    bool isDef = GV->isStrongDefinitionForLinker();
+    bool isStub = (!isDef && Subtarget->isTargetMachO()) &&
                    getTargetMachine().getRelocationModel() != Reloc::Static;
     isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
     // ARM call to a local ARM function is predicable.
-    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+    isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
     // tBX takes a register source operand.
     if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
       assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
-      Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
-                           DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
-                                                      0, ARMII::MO_NONLAZY));
-      Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+      Callee = DAG.getNode(
+          ARMISD::WrapperPIC, dl, PtrVt,
+          DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
+      Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
                            MachinePointerInfo::getGOT(), false, false, true, 0);
     } else if (Subtarget->isTargetCOFF()) {
       assert(Subtarget->isTargetWindows() &&
@@ -1754,20 +1775,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       unsigned TargetFlags = GV->hasDLLImportStorageClass()
                                  ? ARMII::MO_DLLIMPORT
                                  : ARMII::MO_NO_FLAG;
-      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
-                                          TargetFlags);
+      Callee =
+          DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0, TargetFlags);
       if (GV->hasDLLImportStorageClass())
-        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                             DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
-                                         Callee), MachinePointerInfo::getGOT(),
-                             false, false, false, 0);
+        Callee =
+            DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
+                        DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
+                        MachinePointerInfo::getGOT(), false, false, false, 0);
     } else {
       // On ELF targets for PIC code, direct calls should go through the PLT
       unsigned OpFlags = 0;
       if (Subtarget->isTargetELF() &&
           getTargetMachine().getRelocationModel() == Reloc::PIC_)
         OpFlags = ARMII::MO_PLT;
-      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+      Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, OpFlags);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     isDirect = true;
@@ -1781,22 +1802,20 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       ARMConstantPoolValue *CPV =
         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
                                       ARMPCLabelIndex, 4);
-      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-      Callee = DAG.getLoad(getPointerTy(), dl,
-                           DAG.getEntryNode(), CPAddr,
-                           MachinePointerInfo::getConstantPool(),
-                           false, false, false, 0);
+      Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(), false, false,
+                           false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
-      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
-                           getPointerTy(), Callee, PICLabel);
+      Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
     } else {
       unsigned OpFlags = 0;
       // On ELF targets for PIC code, direct calls should go through the PLT
       if (Subtarget->isTargetELF() &&
                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
         OpFlags = ARMII::MO_PLT;
-      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+      Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, OpFlags);
     }
   }
 
@@ -2433,7 +2452,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned ARMPCLabelIndex = 0;
   SDLoc DL(Op);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   SDValue CPAddr;
@@ -2462,7 +2481,7 @@ SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(GA);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2508,7 +2527,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   SDLoc dl(GA);
   SDValue Offset;
   SDValue Chain = DAG.getEntryNode();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   // Get the Thread Pointer
   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
 
@@ -2574,7 +2593,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
                                                  SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
@@ -2617,7 +2636,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 
 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                                     SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -2648,7 +2667,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   const ARMII::TOF TargetFlags =
     (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result;
   SDLoc DL(Op);
 
@@ -2672,7 +2691,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc dl(Op);
   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   ARMConstantPoolValue *CPV =
@@ -2716,14 +2735,14 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
   }
   case Intrinsic::arm_thread_pointer: {
-    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    EVT PtrVT = getPointerTy(DAG.getDataLayout());
     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   }
   case Intrinsic::eh_sjlj_lsda: {
     MachineFunction &MF = DAG.getMachineFunction();
     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-    EVT PtrVT = getPointerTy();
+    EVT PtrVT = getPointerTy(DAG.getDataLayout());
     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
     SDValue CPAddr;
     unsigned PCAdj = (RelocM != Reloc::PIC_)
@@ -2820,7 +2839,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
   SDLoc dl(Op);
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
@@ -2850,7 +2869,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
 
     // Create load node to retrieve arguments from the stack.
-    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
                             MachinePointerInfo::getFixedStack(FI),
                             false, false, false, 0);
@@ -2904,8 +2923,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
   if (REnd != RBegin)
     ArgOffset = -4 * (ARM::R4 - RBegin);
 
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
   int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
-  SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
+  SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
 
   SmallVector<SDValue, 4> MemOps;
   const TargetRegisterClass *RC =
@@ -2918,8 +2938,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
         DAG.getStore(Val.getValue(1), dl, Val, FIN,
                      MachinePointerInfo(OrigArg, 4 * i), false, false, 0);
     MemOps.push_back(Store);
-    FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
-                      DAG.getConstant(4, dl, getPointerTy()));
+    FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
   }
 
   if (!MemOps.empty())
@@ -3013,6 +3032,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
   unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
   AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -3035,7 +3055,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           SDValue ArgValue2;
           if (VA.isMemLoc()) {
             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
-            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
                                     MachinePointerInfo::getFixedStack(FI),
                                     false, false, false, 0);
@@ -3122,7 +3142,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
             int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
                                             CurByValIndex, VA.getLocMemOffset(),
                                             Flags.getByValSize());
-            InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+            InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
             CCInfo.nextInRegsParam();
           } else {
             unsigned FIOffset = VA.getLocMemOffset();
@@ -3130,7 +3150,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                                             FIOffset, true);
 
             // Create load nodes to retrieve arguments from the stack.
-            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
                                          MachinePointerInfo::getFixedStack(FI),
                                          false, false, false, 0));
@@ -3855,7 +3875,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Index = Op.getOperand(2);
   SDLoc dl(Op);
 
-  EVT PTy = getPointerTy();
+  EVT PTy = getPointerTy(DAG.getDataLayout());
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
@@ -4102,8 +4122,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
-                                              EVT VT) const {
+unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+                                              SelectionDAG &DAG) const {
   unsigned Reg = StringSwitch<unsigned>(RegName)
                        .Case("sp", ARM::SP)
                        .Default(0);
@@ -4163,7 +4183,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
   // Turn f64->i64 into VMOVRRD.
   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
     SDValue Cvt;
-    if (TLI.isBigEndian() && SrcVT.isVector() &&
+    if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
         SrcVT.getVectorNumElements() > 1)
       Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
                         DAG.getVTList(MVT::i32, MVT::i32),
@@ -4283,8 +4303,82 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
 
 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
                          const ARMSubtarget *ST) {
-  EVT VT = N->getValueType(0);
   SDLoc dl(N);
+  EVT VT = N->getValueType(0);
+  if (VT.isVector()) {
+    assert(ST->hasNEON());
+
+    // Compute the least significant set bit: LSB = X & -X
+    SDValue X = N->getOperand(0);
+    SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
+    SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
+
+    EVT ElemTy = VT.getVectorElementType();
+
+    if (ElemTy == MVT::i8) {
+      // Compute with: cttz(x) = ctpop(lsb - 1)
+      SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+                                DAG.getTargetConstant(1, dl, ElemTy));
+      SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+      return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
+    }
+
+    if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
+        (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
+      // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
+      unsigned NumBits = ElemTy.getSizeInBits();
+      SDValue WidthMinus1 =
+          DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+                      DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
+      SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
+      return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
+    }
+
+    // Compute with: cttz(x) = ctpop(lsb - 1)
+
+    // Since we can only compute the number of bits in a byte with vcnt.8, we
+    // have to gather the result with pairwise addition (vpaddl) for i16, i32,
+    // and i64.
+
+    // Compute LSB - 1.
+    SDValue Bits;
+    if (ElemTy == MVT::i64) {
+      // Load constant 0xffff'ffff'ffff'ffff to register.
+      SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+                               DAG.getTargetConstant(0x1eff, dl, MVT::i32));
+      Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
+    } else {
+      SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
+                                DAG.getTargetConstant(1, dl, ElemTy));
+      Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
+    }
+
+    // Count #bits with vcnt.8.
+    EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+    SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
+    SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
+
+    // Gather the #bits with vpaddl (pairwise add.)
+    EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+    SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
+        DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+        Cnt8);
+    if (ElemTy == MVT::i16)
+      return Cnt16;
+
+    EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
+    SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
+        DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+        Cnt16);
+    if (ElemTy == MVT::i32)
+      return Cnt32;
+
+    assert(ElemTy == MVT::i64);
+    SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+        DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
+        Cnt32);
+    return Cnt64;
+  }
 
   if (!ST->hasV6T2Ops())
     return SDValue();
@@ -4730,7 +4824,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
       ImmMask <<= 1;
     }
 
-    if (DAG.getTargetLoweringInfo().isBigEndian())
+    if (DAG.getDataLayout().isBigEndian())
       // swap higher and lower 32 bit word
       Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
 
@@ -5868,7 +5962,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
     if (BVN->getValueType(0) != MVT::v4i32 ||
         BVN->getOpcode() != ISD::BUILD_VECTOR)
       return false;
-    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
     unsigned HiElt = 1 - LoElt;
     ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
     ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
@@ -6013,7 +6107,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
     SDNode *BVN = N->getOperand(0).getNode();
     assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
            BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
-    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32,
                        BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
   }
@@ -6342,18 +6436,19 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   SDValue Arg = Op.getOperand(0);
   EVT ArgVT = Arg.getValueType();
   Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Pair of floats / doubles used to pass the result.
   StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
 
   // Create stack object for sret.
-  const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy);
-  const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy);
+  auto &DL = DAG.getDataLayout();
+  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+  const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
   int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
-  SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy());
+  SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
 
   ArgListTy Args;
   ArgListEntry Entry;
@@ -6373,7 +6468,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
 
   const char *LibcallName  = (ArgVT == MVT::f64)
   ? "__sincos_stret" : "__sincosf_stret";
-  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
@@ -6387,7 +6482,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
                                 MachinePointerInfo(), false, false, false, 0);
 
   // Address of cos field.
-  SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet,
+  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
                             DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
   SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add,
                                 MachinePointerInfo(), false, false, false, 0);
@@ -6487,7 +6582,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRL_PARTS:
   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
-  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+  case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
   case ISD::CTPOP:         return LowerCTPOP(Op.getNode(), DAG, Subtarget);
   case ISD::SETCC:         return LowerVSETCC(Op, DAG);
   case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
@@ -6845,9 +6941,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
       const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
 
       // MachineConstantPool wants an explicit alignment.
-      unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+      unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
       if (Align == 0)
-        Align = getDataLayout()->getTypeAllocSize(C->getType());
+        Align = MF->getDataLayout().getTypeAllocSize(C->getType());
       unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
 
       unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -6935,9 +7031,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
       const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
 
       // MachineConstantPool wants an explicit alignment.
-      unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+      unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
       if (Align == 0)
-        Align = getDataLayout()->getTypeAllocSize(C->getType());
+        Align = MF->getDataLayout().getTypeAllocSize(C->getType());
       unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
 
       unsigned VReg1 = MRI->createVirtualRegister(TRC);
@@ -7313,9 +7409,9 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
     const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
 
     // MachineConstantPool wants an explicit alignment.
-    unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
+    unsigned Align = MF->getDataLayout().getPrefTypeAlignment(Int32Ty);
     if (Align == 0)
-      Align = getDataLayout()->getTypeAllocSize(C->getType());
+      Align = MF->getDataLayout().getTypeAllocSize(C->getType());
     unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
 
     if (IsThumb1)
@@ -8001,7 +8097,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
   // Build operand list.
   SmallVector<SDValue, 8> Ops;
   Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
-                                TLI.getPointerTy()));
+                                TLI.getPointerTy(DAG.getDataLayout())));
 
   // Input is the vector.
   Ops.push_back(Vec);
@@ -8681,7 +8777,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
                                  std::min(4U, LD->getAlignment() / 2));
 
     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
-    if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
+    if (DCI.DAG.getDataLayout().isBigEndian())
       std::swap (NewLD1, NewLD2);
     SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
     return Result;
@@ -9312,7 +9408,9 @@ static SDValue PerformSTORECombine(SDNode *N,
     SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
     SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
     for (unsigned i = 0; i < NumElems; ++i)
-      ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
+      ShuffleVec[i] = DAG.getDataLayout().isBigEndian()
+                          ? (i + 1) * SizeRatio - 1
+                          : i * SizeRatio;
 
     // Can't shuffle using an illegal type.
     if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
@@ -9339,8 +9437,8 @@ static SDValue PerformSTORECombine(SDNode *N,
     assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
     SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
     SmallVector<SDValue, 8> Chains;
-    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, DL,
-                                        TLI.getPointerTy());
+    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
+                                        TLI.getPointerTy(DAG.getDataLayout()));
     SDValue BasePtr = St->getBasePtr();
 
     // Perform one or more big stores into memory.
@@ -9367,7 +9465,7 @@ static SDValue PerformSTORECombine(SDNode *N,
   if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
       StVal.getNode()->hasOneUse()) {
     SelectionDAG  &DAG = DCI.DAG;
-    bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
+    bool isBigEndian = DAG.getDataLayout().isBigEndian();
     SDLoc DL(St);
     SDValue BasePtr = St->getBasePtr();
     SDValue NewST1 = DAG.getStore(St->getChain(), DL,
@@ -10078,7 +10176,7 @@ bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
     // For any little-endian targets with neon, we can support unaligned ld/st
     // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
     // A big-endian target may also explicitly support unaligned accesses
-    if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+    if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
       if (Fast)
         *Fast = true;
       return true;
@@ -10317,10 +10415,10 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
 
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
-bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                              Type *Ty,
+bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                              const AddrMode &AM, Type *Ty,
                                               unsigned AS) const {
-  EVT VT = getValueType(Ty, true);
+  EVT VT = getValueType(DL, Ty, true);
   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
     return false;
 
@@ -10664,7 +10762,7 @@ bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 ARMTargetLowering::ConstraintType
-ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+ARMTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default:  break;
@@ -10723,10 +10821,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
 }
 
 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
-RCPair
-ARMTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                                const std::string &Constraint,
-                                                MVT VT) const {
+RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC ARM Constraint Letters
     switch (Constraint[0]) {
@@ -10974,7 +11070,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   }
 
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
-                                         getPointerTy());
+                                         getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
 
@@ -11083,7 +11179,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::arm_neon_vld4lane: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     // Conservatively set memVT to the entire set of vectors loaded.
-    uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+    auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+    uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
@@ -11103,12 +11200,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::arm_neon_vst4lane: {
     Info.opc = ISD::INTRINSIC_VOID;
     // Conservatively set memVT to the entire set of vectors stored.
+    auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
     unsigned NumElts = 0;
     for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
       Type *ArgTy = I.getArgOperand(ArgI)->getType();
       if (!ArgTy->isVectorTy())
         break;
-      NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+      NumElts += DL.getTypeAllocSize(ArgTy) / 8;
     }
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(0);
@@ -11122,12 +11220,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   }
   case Intrinsic::arm_ldaex:
   case Intrinsic::arm_ldrex: {
+    auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(PtrTy->getElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
     Info.vol = true;
     Info.readMem = true;
     Info.writeMem = false;
@@ -11135,12 +11234,13 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   }
   case Intrinsic::arm_stlex:
   case Intrinsic::arm_strex: {
+    auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     Info.memVT = MVT::getVT(PtrTy->getElementType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
     Info.vol = true;
     Info.readMem = false;
     Info.writeMem = true;
@@ -11427,9 +11527,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
   VectorType *VecTy = Shuffles[0]->getType();
   Type *EltTy = VecTy->getVectorElementType();
 
-  const DataLayout *DL = getDataLayout();
-  unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
-  bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+  unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
+  bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
 
   // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
   // support i64/f64 element).
@@ -11439,8 +11539,8 @@ bool ARMTargetLowering::lowerInterleavedLoad(
   // A pointer vector can not be the return type of the ldN intrinsics. Need to
   // load integer vectors first and then convert to pointer vectors.
   if (EltTy->isPointerTy())
-    VecTy = VectorType::get(DL->getIntPtrType(EltTy),
-                            VecTy->getVectorNumElements());
+    VecTy =
+        VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
 
   static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
                                             Intrinsic::arm_neon_vld3,
@@ -11517,9 +11617,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
   Type *EltTy = VecTy->getVectorElementType();
   VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
 
-  const DataLayout *DL = getDataLayout();
-  unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
-  bool EltIs64Bits = DL->getTypeAllocSizeInBits(EltTy) == 64;
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
+  bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
 
   // Skip illegal sub vector types and vector types of i64/f64 element (vstN
   // doesn't support i64/f64 element).
@@ -11533,7 +11633,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
   // StN intrinsics don't support pointer vectors as arguments. Convert pointer
   // vectors to integer vectors.
   if (EltTy->isPointerTy()) {
-    Type *IntTy = DL->getIntPtrType(EltTy);
+    Type *IntTy = DL.getIntPtrType(EltTy);
 
     // Convert to the corresponding integer vector.
     Type *IntVecTy =
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 74396392f8e3..efc9020c193a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -249,7 +249,8 @@ namespace llvm {
     }
 
     /// getSetCCResultType - Return the value type to use for ISD::SETCC.
-    EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                           EVT VT) const override;
 
     MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
@@ -286,8 +287,8 @@ namespace llvm {
 
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
-                               unsigned AS) const override;
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
     bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
 
     /// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -324,8 +325,7 @@ namespace llvm {
 
     bool ExpandInlineAsm(CallInst *CI) const override;
 
-    ConstraintType
-      getConstraintType(const std::string &Constraint) const override;
+    ConstraintType getConstraintType(StringRef Constraint) const override;
 
     /// Examine constraint string and operand type and determine a weight value.
     /// The operand object must already have been set up with the operand type.
@@ -334,8 +334,7 @@ namespace llvm {
 
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
@@ -345,8 +344,8 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
-    unsigned getInlineAsmMemConstraint(
-        const std::string &ConstraintCode) const override {
+    unsigned
+    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
       if (ConstraintCode == "Q")
         return InlineAsm::Constraint_Q;
       else if (ConstraintCode.size() == 2) {
@@ -533,7 +532,8 @@ namespace llvm {
     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
 
-    unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+    unsigned getRegisterByName(const char* RegName, EVT VT,
+                               SelectionDAG &DAG) const override;
 
     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b8cac135baf6..61c45af26fe1 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -306,8 +306,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">;
 def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">;
 
-def IsLE             : Predicate<"getTargetLowering()->isLittleEndian()">;
-def IsBE             : Predicate<"getTargetLowering()->isBigEndian()">;
+def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
+def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 245c9e869bf6..37352810c99f 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -31,11 +31,13 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -65,12 +67,18 @@ namespace {
     static char ID;
     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
 
+    const MachineFunction *MF;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const MachineRegisterInfo *MRI;
     const ARMSubtarget *STI;
     const TargetLowering *TL;
     ARMFunctionInfo *AFI;
-    RegScavenger *RS;
+    LivePhysRegs LiveRegs;
+    RegisterClassInfo RegClassInfo;
+    MachineBasicBlock::const_iterator LiveRegPos;
+    bool LiveRegsValid;
+    bool RegClassInfoValid;
     bool isThumb1, isThumb2;
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -80,64 +88,60 @@ namespace {
     }
 
   private:
+    /// A set of load/store MachineInstrs with same base register sorted by
+    /// offset.
     struct MemOpQueueEntry {
-      int Offset;
-      unsigned Reg;
-      bool isKill;
-      unsigned Position;
-      MachineBasicBlock::iterator MBBI;
-      bool Merged;
-      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
-                      MachineBasicBlock::iterator i)
-        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+      MachineInstr *MI;
+      int Offset;        ///< Load/Store offset.
+      unsigned Position; ///< Position as counted from end of basic block.
+      MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
+        : MI(MI), Offset(Offset), Position(Position) {}
     };
     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
-    typedef MemOpQueue::iterator MemOpQueueIter;
 
-    void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
-                          const MemOpQueue &MemOps, unsigned DefReg,
-                          unsigned RangeBegin, unsigned RangeEnd);
+    /// A set of MachineInstrs that fulfill (nearly all) conditions to get
+    /// merged into a LDM/STM.
+    struct MergeCandidate {
+      /// List of instructions ordered by load/store offset.
+      SmallVector<MachineInstr*, 4> Instrs;
+      /// Index in Instrs of the instruction being latest in the schedule.
+      unsigned LatestMIIdx;
+      /// Index in Instrs of the instruction being earliest in the schedule.
+      unsigned EarliestMIIdx;
+      /// Index into the basic block where the merged instruction will be
+      /// inserted. (See MemOpQueueEntry.Position)
+      unsigned InsertPos;
+      /// Whether the instructions can be merged into a ldm/stm instruction.
+      bool CanMergeToLSMulti;
+      /// Whether the instructions can be merged into a ldrd/strd instruction.
+      bool CanMergeToLSDouble;
+    };
+    SpecificBumpPtrAllocator<MergeCandidate> Allocator;
+    SmallVector<const MergeCandidate*,4> Candidates;
+    SmallVector<MachineInstr*,4> MergeBaseCandidates;
+
+    void moveLiveRegsBefore(const MachineBasicBlock &MBB,
+                            MachineBasicBlock::const_iterator Before);
+    unsigned findFreeReg(const TargetRegisterClass &RegClass);
     void UpdateBaseRegUses(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
-                           DebugLoc dl, unsigned Base, unsigned WordOffset,
+                           DebugLoc DL, unsigned Base, unsigned WordOffset,
                            ARMCC::CondCodes Pred, unsigned PredReg);
-    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-                  int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
-                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
-                  DebugLoc dl,
-                  ArrayRef<std::pair<unsigned, bool> > Regs,
-                  ArrayRef<unsigned> ImpDefs);
-    void MergeOpsUpdate(MachineBasicBlock &MBB,
-                        MemOpQueue &MemOps,
-                        unsigned memOpsBegin,
-                        unsigned memOpsEnd,
-                        unsigned insertAfter,
-                        int Offset,
-                        unsigned Base,
-                        bool BaseKill,
-                        unsigned Opcode,
-                        ARMCC::CondCodes Pred,
-                        unsigned PredReg,
-                        unsigned Scratch,
-                        DebugLoc dl,
-                        SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
-    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
-                      unsigned Opcode, unsigned Size,
-                      ARMCC::CondCodes Pred, unsigned PredReg,
-                      unsigned Scratch, MemOpQueue &MemOps,
-                      SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
-    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+    MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
+        MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+        bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+        DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
+    MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
+        MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+        bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+        DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
+    void FormCandidates(const MemOpQueue &MemOps);
+    MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator &MBBI);
-    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MBBI,
-                                  const TargetInstrInfo *TII,
-                                  bool &Advance,
-                                  MachineBasicBlock::iterator &I);
-    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   bool &Advance,
-                                   MachineBasicBlock::iterator &I);
+    bool MergeBaseUpdateLoadStore(MachineInstr *MI);
+    bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+    bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -185,6 +189,14 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
   return Offset;
 }
 
+static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
+  return MI.getOperand(1);
+}
+
+static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
+  return MI.getOperand(0);
+}
+
 static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
   switch (Opcode) {
   default: llvm_unreachable("Unhandled opcode!");
@@ -348,6 +360,10 @@ static bool isi32Store(unsigned Opc) {
   return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
 }
 
+static bool isLoadSingle(unsigned Opc) {
+  return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+}
+
 static unsigned getImmScale(unsigned Opc) {
   switch (Opc) {
   default: llvm_unreachable("Unhandled opcode!");
@@ -365,12 +381,55 @@ static unsigned getImmScale(unsigned Opc) {
   }
 }
 
+static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default: return 0;
+  case ARM::LDRi12:
+  case ARM::STRi12:
+  case ARM::tLDRi:
+  case ARM::tSTRi:
+  case ARM::tLDRspi:
+  case ARM::tSTRspi:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+  case ARM::VLDRS:
+  case ARM::VSTRS:
+    return 4;
+  case ARM::VLDRD:
+  case ARM::VSTRD:
+    return 8;
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::tLDMIA:
+  case ARM::tLDMIA_UPD:
+  case ARM::tSTMIA_UPD:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::VLDMSIA:
+  case ARM::VSTMSIA:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+  case ARM::VLDMDIA:
+  case ARM::VSTMDIA:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+  }
+}
+
 /// Update future uses of the base register with the offset introduced
 /// due to writeback. This function only works on Thumb1.
 void
 ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI,
-                                   DebugLoc dl, unsigned Base,
+                                   DebugLoc DL, unsigned Base,
                                    unsigned WordOffset,
                                    ARMCC::CondCodes Pred, unsigned PredReg) {
   assert(isThumb1 && "Can only update base register uses for Thumb1!");
@@ -398,7 +457,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
         Offset = MO.getImm() - WordOffset * getImmScale(Opc);
 
         // If storing the base register, it needs to be reset first.
-        unsigned InstrSrcReg = MBBI->getOperand(0).getReg();
+        unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
 
         if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
           MO.setImm(Offset);
@@ -439,7 +498,7 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
 
     if (InsertSub) {
       // An instruction above couldn't be updated, so insert a sub.
-      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+      AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
         .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
       return;
     }
@@ -457,31 +516,65 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
     // See PR21029.
     if (MBBI != MBB.end()) --MBBI;
     AddDefaultT1CC(
-      BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true)
+      BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
       .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
   }
 }
 
+/// Return the first register of class \p RegClass that is not in \p Regs.
+unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
+  if (!RegClassInfoValid) {
+    RegClassInfo.runOnMachineFunction(*MF);
+    RegClassInfoValid = true;
+  }
+
+  for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
+    if (!LiveRegs.contains(Reg))
+      return Reg;
+  return 0;
+}
+
+/// Compute live registers just before instruction \p Before (in normal schedule
+/// direction). Computes backwards so multiple queries in the same block must
+/// come in reverse order.
+void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
+    MachineBasicBlock::const_iterator Before) {
+  // Initialize if we never queried in this block.
+  if (!LiveRegsValid) {
+    LiveRegs.init(TRI);
+    LiveRegs.addLiveOuts(&MBB, true);
+    LiveRegPos = MBB.end();
+    LiveRegsValid = true;
+  }
+  // Move backward just before the "Before" position.
+  while (LiveRegPos != Before) {
+    --LiveRegPos;
+    LiveRegs.stepBackward(*LiveRegPos);
+  }
+}
+
+static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
+                        unsigned Reg) {
+  for (const std::pair<unsigned, bool> &R : Regs)
+    if (R.first == Reg)
+      return true;
+  return false;
+}
+
 /// Create and insert a LDM or STM with Base as base register and registers in
 /// Regs as the register operands that would be loaded / stored.  It returns
 /// true if the transformation is done.
-bool
-ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MBBI,
-                          int Offset, unsigned Base, bool BaseKill,
-                          unsigned Opcode, ARMCC::CondCodes Pred,
-                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
-                          ArrayRef<std::pair<unsigned, bool> > Regs,
-                          ArrayRef<unsigned> ImpDefs) {
-  // Only a single register to load / store. Don't bother.
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+    bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+    DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
   unsigned NumRegs = Regs.size();
-  if (NumRegs <= 1)
-    return false;
+  assert(NumRegs > 1);
 
   // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
   // Compute liveness information for that register to make the decision.
   bool SafeToClobberCPSR = !isThumb1 ||
-    (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) ==
+    (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
      MachineBasicBlock::LQR_Dead);
 
   bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
@@ -489,17 +582,14 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   // Exception: If the base register is in the input reglist, Thumb1 LDM is
   // non-writeback.
   // It's also not possible to merge an STR of the base register in Thumb1.
-  if (isThumb1)
-    for (const std::pair<unsigned, bool> &R : Regs)
-      if (Base == R.first) {
-        assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
-        if (Opcode == ARM::tLDRi) {
-          Writeback = false;
-          break;
-        } else if (Opcode == ARM::tSTRi) {
-          return false;
-        }
-      }
+  if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
+    assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
+    if (Opcode == ARM::tLDRi) {
+      Writeback = false;
+    } else if (Opcode == ARM::tSTRi) {
+      return nullptr;
+    }
+  }
 
   ARM_AM::AMSubMode Mode = ARM_AM::ia;
   // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
@@ -516,18 +606,18 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
     // Check if this is a supported opcode before inserting instructions to
     // calculate a new base register.
-    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
+    if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
 
     // If starting offset isn't zero, insert a MI to materialize a new base.
     // But only do so if it is cost effective, i.e. merging more than two
     // loads / stores.
     if (NumRegs <= 2)
-      return false;
+      return nullptr;
 
     // On Thumb1, it's not worth materializing a new base register without
     // clobbering the CPSR (i.e. not using ADDS/SUBS).
     if (!SafeToClobberCPSR)
-      return false;
+      return nullptr;
 
     unsigned NewBase;
     if (isi32Load(Opcode)) {
@@ -535,10 +625,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       // use as the new base.
       NewBase = Regs[NumRegs-1].first;
     } else {
-      // Use the scratch register to use as a new base.
-      NewBase = Scratch;
+      // Find a free register that we can use as scratch register.
+      moveLiveRegsBefore(MBB, InsertBefore);
+      // The merged instruction does not exist yet but will use several Regs if
+      // it is a Store.
+      if (!isLoadSingle(Opcode))
+        for (const std::pair<unsigned, bool> &R : Regs)
+          LiveRegs.addReg(R.first);
+
+      NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
       if (NewBase == 0)
-        return false;
+        return nullptr;
     }
 
     int BaseOpc =
@@ -557,7 +654,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
 
     if (!TL->isLegalAddImmediate(Offset))
       // FIXME: Try add with register operand?
-      return false; // Probably not worth it then.
+      return nullptr; // Probably not worth it then.
+
+    // We can only append a kill flag to the add/sub input if the value is not
+    // used in the register list of the stm as well.
+    bool KillOldBase = BaseKill &&
+      (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
 
     if (isThumb1) {
       // Thumb1: depending on immediate size, use either
@@ -572,43 +674,44 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
             !STI->hasV6Ops()) {
           // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
           if (Pred != ARMCC::AL)
-            return false;
-          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVSr), NewBase)
-            .addReg(Base, getKillRegState(BaseKill));
+            return nullptr;
+          BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
+            .addReg(Base, getKillRegState(KillOldBase));
         } else
-          BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
-            .addReg(Base, getKillRegState(BaseKill))
+          BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
+            .addReg(Base, getKillRegState(KillOldBase))
             .addImm(Pred).addReg(PredReg);
 
-        // Set up BaseKill and Base correctly to insert the ADDS/SUBS below.
+        // The following ADDS/SUBS becomes an update.
         Base = NewBase;
-        BaseKill = false;
+        KillOldBase = true;
       }
       if (BaseOpc == ARM::tADDrSPi) {
         assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
-        BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
-          .addReg(Base, getKillRegState(BaseKill)).addImm(Offset/4)
+        BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+          .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
           .addImm(Pred).addReg(PredReg);
       } else
-        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true)
-          .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+        AddDefaultT1CC(
+          BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
+          .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
           .addImm(Pred).addReg(PredReg);
     } else {
-      BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
-        .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+      BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
+        .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
         .addImm(Pred).addReg(PredReg).addReg(0);
     }
     Base = NewBase;
     BaseKill = true; // New base is always killed straight away.
   }
 
-  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
-                Opcode == ARM::VLDRD);
+  bool isDef = isLoadSingle(Opcode);
 
   // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
   // base register writeback.
   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
-  if (!Opcode) return false;
+  if (!Opcode)
+    return nullptr;
 
   // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
   // - There is no writeback (LDM of base register),
@@ -619,7 +722,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   // It's safe to return here since the code to materialize a new base register
   // above is also conditional on SafeToClobberCPSR.
   if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
-    return false;
+    return nullptr;
 
   MachineInstrBuilder MIB;
 
@@ -628,7 +731,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       // Update tLDMIA with writeback if necessary.
       Opcode = ARM::tLDMIA_UPD;
 
-    MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+    MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
 
     // Thumb1: we might need to set base writeback when building the MI.
     MIB.addReg(Base, getDefRegState(true))
@@ -637,381 +740,257 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     // The base isn't dead after a merged instruction with writeback.
     // Insert a sub instruction after the newly formed instruction to reset.
     if (!BaseKill)
-      UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+      UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
 
   } else {
     // No writeback, simply build the MachineInstr.
-    MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+    MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
     MIB.addReg(Base, getKillRegState(BaseKill));
   }
 
   MIB.addImm(Pred).addReg(PredReg);
 
   for (const std::pair<unsigned, bool> &R : Regs)
-    MIB = MIB.addReg(R.first, getDefRegState(isDef)
-                     | getKillRegState(R.second));
+    MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
 
-  // Add implicit defs for super-registers.
-  for (unsigned ImpDef : ImpDefs)
-    MIB.addReg(ImpDef, RegState::ImplicitDefine);
-
-  return true;
+  return MIB.getInstr();
 }
 
-/// Find all instructions using a given imp-def within a range.
-///
-/// We are trying to combine a range of instructions, one of which (located at
-/// position RangeBegin) implicitly defines a register. The final LDM/STM will
-/// be placed at RangeEnd, and so any uses of this definition between RangeStart
-/// and RangeEnd must be modified to use an undefined value.
-///
-/// The live range continues until we find a second definition or one of the
-/// uses we find is a kill. Unfortunately MemOps is not sorted by Position, so
-/// we must consider all uses and decide which are relevant in a second pass.
-void ARMLoadStoreOpt::findUsesOfImpDef(
-    SmallVectorImpl<MachineOperand *> &UsesOfImpDefs, const MemOpQueue &MemOps,
-    unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd) {
-  std::map<unsigned, MachineOperand *> Uses;
-  unsigned LastLivePos = RangeEnd;
-
-  // First we find all uses of this register with Position between RangeBegin
-  // and RangeEnd, any or all of these could be uses of a definition at
-  // RangeBegin. We also record the latest position a definition at RangeBegin
-  // would be considered live.
-  for (unsigned i = 0; i < MemOps.size(); ++i) {
-    MachineInstr &MI = *MemOps[i].MBBI;
-    unsigned MIPosition = MemOps[i].Position;
-    if (MIPosition <= RangeBegin || MIPosition > RangeEnd)
-      continue;
-
-    // If this instruction defines the register, then any later use will be of
-    // that definition rather than ours.
-    if (MI.definesRegister(DefReg))
-      LastLivePos = std::min(LastLivePos, MIPosition);
-
-    MachineOperand *UseOp = MI.findRegisterUseOperand(DefReg);
-    if (!UseOp)
-      continue;
-
-    // If this instruction kills the register then (assuming liveness is
-    // correct when we start) we don't need to think about anything after here.
-    if (UseOp->isKill())
-      LastLivePos = std::min(LastLivePos, MIPosition);
-
-    Uses[MIPosition] = UseOp;
-  }
-
-  // Now we traverse the list of all uses, and append the ones that actually use
-  // our definition to the requested list.
-  for (std::map<unsigned, MachineOperand *>::iterator I = Uses.begin(),
-                                                      E = Uses.end();
-       I != E; ++I) {
-    // List is sorted by position so once we've found one out of range there
-    // will be no more to consider.
-    if (I->first > LastLivePos)
-      break;
-    UsesOfImpDefs.push_back(I->second);
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+    bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+    DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
+  bool IsLoad = isi32Load(Opcode);
+  assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
+  unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
+
+  assert(Regs.size() == 2);
+  MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
+                                    TII->get(LoadStoreOpcode));
+  if (IsLoad) {
+    MIB.addReg(Regs[0].first, RegState::Define)
+       .addReg(Regs[1].first, RegState::Define);
+  } else {
+    MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
+       .addReg(Regs[1].first, getKillRegState(Regs[1].second));
   }
+  MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+  return MIB.getInstr();
 }
 
 /// Call MergeOps and update MemOps and merges accordingly on success.
-void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
-                                     MemOpQueue &memOps,
-                                     unsigned memOpsBegin, unsigned memOpsEnd,
-                                     unsigned insertAfter, int Offset,
-                                     unsigned Base, bool BaseKill,
-                                     unsigned Opcode,
-                                     ARMCC::CondCodes Pred, unsigned PredReg,
-                                     unsigned Scratch,
-                                     DebugLoc dl,
-                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
-  // First calculate which of the registers should be killed by the merged
-  // instruction.
-  const unsigned insertPos = memOps[insertAfter].Position;
-  SmallSet<unsigned, 4> KilledRegs;
-  DenseMap<unsigned, unsigned> Killer;
-  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
-    if (i == memOpsBegin) {
-      i = memOpsEnd;
-      if (i == e)
-        break;
-    }
-    if (memOps[i].Position < insertPos && memOps[i].isKill) {
-      unsigned Reg = memOps[i].Reg;
+MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
+  const MachineInstr *First = Cand.Instrs.front();
+  unsigned Opcode = First->getOpcode();
+  bool IsLoad = isLoadSingle(Opcode);
+  SmallVector<std::pair<unsigned, bool>, 8> Regs;
+  SmallVector<unsigned, 4> ImpDefs;
+  DenseSet<unsigned> KilledRegs;
+  // Determine list of registers and list of implicit super-register defs.
+  for (const MachineInstr *MI : Cand.Instrs) {
+    const MachineOperand &MO = getLoadStoreRegOp(*MI);
+    unsigned Reg = MO.getReg();
+    bool IsKill = MO.isKill();
+    if (IsKill)
       KilledRegs.insert(Reg);
-      Killer[Reg] = i;
+    Regs.push_back(std::make_pair(Reg, IsKill));
+
+    if (IsLoad) {
+      // Collect any implicit defs of super-registers, after merging we can't
+      // be sure anymore that we properly preserved these live ranges and must
+      // removed these implicit operands.
+      for (const MachineOperand &MO : MI->implicit_operands()) {
+        if (!MO.isReg() || !MO.isDef() || MO.isDead())
+          continue;
+        assert(MO.isImplicit());
+        unsigned DefReg = MO.getReg();
+
+        if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
+          continue;
+        // We can ignore cases where the super-reg is read and written.
+        if (MI->readsRegister(DefReg))
+          continue;
+        ImpDefs.push_back(DefReg);
+      }
     }
   }
 
-  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
-    MachineOperand &TransferOp = memOps[i].MBBI->getOperand(0);
-    if (TransferOp.isUse() && TransferOp.getReg() == Base)
-      BaseKill = false;
+  // Attempt the merge.
+  typedef MachineBasicBlock::iterator iterator;
+  MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
+  iterator InsertBefore = std::next(iterator(LatestMI));
+  MachineBasicBlock &MBB = *LatestMI->getParent();
+  unsigned Offset = getMemoryOpOffset(First);
+  unsigned Base = getLoadStoreBaseOp(*First).getReg();
+  bool BaseKill = LatestMI->killsRegister(Base);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
+  DebugLoc DL = First->getDebugLoc();
+  MachineInstr *Merged = nullptr;
+  if (Cand.CanMergeToLSDouble)
+    Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
+                                   Opcode, Pred, PredReg, DL, Regs);
+  if (!Merged && Cand.CanMergeToLSMulti)
+    Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
+                                  Opcode, Pred, PredReg, DL, Regs);
+  if (!Merged)
+    return nullptr;
+
+  // Determine earliest instruction that will get removed. We then keep an
+  // iterator just above it so the following erases don't invalidated it.
+  iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
+  bool EarliestAtBegin = false;
+  if (EarliestI == MBB.begin()) {
+    EarliestAtBegin = true;
+  } else {
+    EarliestI = std::prev(EarliestI);
   }
 
-  SmallVector<std::pair<unsigned, bool>, 8> Regs;
-  SmallVector<unsigned, 8> ImpDefs;
-  SmallVector<MachineOperand *, 8> UsesOfImpDefs;
-  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
-    unsigned Reg = memOps[i].Reg;
-    // If we are inserting the merged operation after an operation that
-    // uses the same register, make sure to transfer any kill flag.
-    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
-    Regs.push_back(std::make_pair(Reg, isKill));
-
-    // Collect any implicit defs of super-registers. They must be preserved.
-    for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
-      if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
-        continue;
-      unsigned DefReg = MO.getReg();
-      if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
-        ImpDefs.push_back(DefReg);
-
-      // There may be other uses of the definition between this instruction and
-      // the eventual LDM/STM position. These should be marked undef if the
-      // merge takes place.
-      findUsesOfImpDef(UsesOfImpDefs, memOps, DefReg, memOps[i].Position,
-                       insertPos);
+  // Remove instructions which have been merged.
+  for (MachineInstr *MI : Cand.Instrs)
+    MBB.erase(MI);
+
+  // Determine range between the earliest removed instruction and the new one.
+  if (EarliestAtBegin)
+    EarliestI = MBB.begin();
+  else
+    EarliestI = std::next(EarliestI);
+  auto FixupRange = make_range(EarliestI, iterator(Merged));
+
+  if (isLoadSingle(Opcode)) {
+    // If the previous loads defined a super-reg, then we have to mark earlier
+    // operands undef; Replicate the super-reg def on the merged instruction.
+    for (MachineInstr &MI : FixupRange) {
+      for (unsigned &ImpDefReg : ImpDefs) {
+        for (MachineOperand &MO : MI.implicit_operands()) {
+          if (!MO.isReg() || MO.getReg() != ImpDefReg)
+            continue;
+          if (MO.readsReg())
+            MO.setIsUndef();
+          else if (MO.isDef())
+            ImpDefReg = 0;
+        }
+      }
     }
-  }
 
-  // Try to do the merge.
-  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
-  ++Loc;
-  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
-                Pred, PredReg, Scratch, dl, Regs, ImpDefs))
-    return;
-
-  // Merge succeeded, update records.
-  Merges.push_back(std::prev(Loc));
-
-  // In gathering loads together, we may have moved the imp-def of a register
-  // past one of its uses. This is OK, since we know better than the rest of
-  // LLVM what's OK with ARM loads and stores; but we still have to adjust the
-  // affected uses.
-  for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
-                                                   E = UsesOfImpDefs.end();
-                                                   I != E; ++I)
-    (*I)->setIsUndef();
-
-  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
-    // Remove kill flags from any memops that come before insertPos.
-    if (Regs[i-memOpsBegin].second) {
-      unsigned Reg = Regs[i-memOpsBegin].first;
-      if (KilledRegs.count(Reg)) {
-        unsigned j = Killer[Reg];
-        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
-        assert(Idx >= 0 && "Cannot find killing operand");
-        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
-        memOps[j].isKill = false;
+    MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
+    for (unsigned ImpDef : ImpDefs)
+      MIB.addReg(ImpDef, RegState::ImplicitDefine);
+  } else {
+    // Remove kill flags: We are possibly storing the values later now.
+    assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
+    for (MachineInstr &MI : FixupRange) {
+      for (MachineOperand &MO : MI.uses()) {
+        if (!MO.isReg() || !MO.isKill())
+          continue;
+        if (KilledRegs.count(MO.getReg()))
+          MO.setIsKill(false);
       }
-      memOps[i].isKill = true;
     }
-    MBB.erase(memOps[i].MBBI);
-    // Update this memop to refer to the merged instruction.
-    // We may need to move kill flags again.
-    memOps[i].Merged = true;
-    memOps[i].MBBI = Merges.back();
-    memOps[i].Position = insertPos;
+    assert(ImpDefs.empty());
   }
 
-  // Update memOps offsets, since they may have been modified by MergeOps.
-  for (auto &MemOp : memOps) {
-    MemOp.Offset = getMemoryOpOffset(MemOp.MBBI);
-  }
+  return Merged;
 }
 
-/// Merge a number of load / store instructions into one or more load / store
-/// multiple instructions.
-void
-ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
-                         unsigned Base, unsigned Opcode, unsigned Size,
-                         ARMCC::CondCodes Pred, unsigned PredReg,
-                         unsigned Scratch, MemOpQueue &MemOps,
-                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
-  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
-  int Offset = MemOps[SIndex].Offset;
-  int SOffset = Offset;
-  unsigned insertAfter = SIndex;
-  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
-  DebugLoc dl = Loc->getDebugLoc();
-  const MachineOperand &PMO = Loc->getOperand(0);
-  unsigned PReg = PMO.getReg();
-  unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
-  unsigned Count = 1;
-  unsigned Limit = ~0U;
-  bool BaseKill = false;
-  // vldm / vstm limit are 32 for S variants, 16 for D variants.
+static bool isValidLSDoubleOffset(int Offset) {
+  unsigned Value = abs(Offset);
+  // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
+  // multiplied by 4.
+  return (Value % 4) == 0 && Value < 1024;
+}
 
-  switch (Opcode) {
-  default: break;
-  case ARM::VSTRS:
-    Limit = 32;
-    break;
-  case ARM::VSTRD:
-    Limit = 16;
-    break;
-  case ARM::VLDRD:
-    Limit = 16;
-    break;
-  case ARM::VLDRS:
-    Limit = 32;
-    break;
-  }
+/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
+void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
+  const MachineInstr *FirstMI = MemOps[0].MI;
+  unsigned Opcode = FirstMI->getOpcode();
+  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+  unsigned Size = getLSMultipleTransferSize(FirstMI);
+
+  unsigned SIndex = 0;
+  unsigned EIndex = MemOps.size();
+  do {
+    // Look at the first instruction.
+    const MachineInstr *MI = MemOps[SIndex].MI;
+    int Offset = MemOps[SIndex].Offset;
+    const MachineOperand &PMO = getLoadStoreRegOp(*MI);
+    unsigned PReg = PMO.getReg();
+    unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
+    unsigned Latest = SIndex;
+    unsigned Earliest = SIndex;
+    unsigned Count = 1;
+    bool CanMergeToLSDouble =
+      STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
+    // ARM errata 602117: LDRD with base in list may result in incorrect base
+    // register when interrupted or faulted.
+    if (STI->isCortexM3() && isi32Load(Opcode) &&
+        PReg == getLoadStoreBaseOp(*MI).getReg())
+      CanMergeToLSDouble = false;
+
+    bool CanMergeToLSMulti = true;
+    // On swift vldm/vstm starting with an odd register number as that needs
+    // more uops than single vldrs.
+    if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+      CanMergeToLSMulti = false;
+
+    // Merge following instructions where possible.
+    for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
+      int NewOffset = MemOps[I].Offset;
+      if (NewOffset != Offset + (int)Size)
+        break;
+      const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
+      unsigned Reg = MO.getReg();
+      unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
+
+      // See if the current load/store may be part of a multi load/store.
+      bool PartOfLSMulti = CanMergeToLSMulti;
+      if (PartOfLSMulti) {
+        // Cannot load from SP
+        if (Reg == ARM::SP)
+          PartOfLSMulti = false;
+        // Register numbers must be in ascending order.
+        else if (RegNum <= PRegNum)
+          PartOfLSMulti = false;
+        // For VFP / NEON load/store multiples, the registers must be
+        // consecutive and within the limit on the number of registers per
+        // instruction.
+        else if (!isNotVFP && RegNum != PRegNum+1)
+          PartOfLSMulti = false;
+      }
+      // See if the current load/store may be part of a double load/store.
+      bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
 
-  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
-    int NewOffset = MemOps[i].Offset;
-    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
-    unsigned Reg = MO.getReg();
-    unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
-    // Register numbers must be in ascending order. For VFP / NEON load and
-    // store multiples, the registers must also be consecutive and within the
-    // limit on the number of registers per instruction.
-    if (Reg != ARM::SP &&
-        NewOffset == Offset + (int)Size &&
-        ((isNotVFP && RegNum > PRegNum) ||
-         ((Count < Limit) && RegNum == PRegNum+1)) &&
-        // On Swift we don't want vldm/vstm to start with a odd register num
-        // because Q register unaligned vldm/vstm need more uops.
-        (!STI->isSwift() || isNotVFP || Count != 1 || !(PRegNum & 0x1))) {
+      if (!PartOfLSMulti && !PartOfLSDouble)
+        break;
+      CanMergeToLSMulti &= PartOfLSMulti;
+      CanMergeToLSDouble &= PartOfLSDouble;
+      // Track MemOp with latest and earliest position (Positions are
+      // counted in reverse).
+      unsigned Position = MemOps[I].Position;
+      if (Position < MemOps[Latest].Position)
+        Latest = I;
+      else if (Position > MemOps[Earliest].Position)
+        Earliest = I;
+      // Prepare for next MemOp.
       Offset += Size;
       PRegNum = RegNum;
-      ++Count;
-    } else {
-      // Can't merge this in. Try merge the earlier ones first.
-      // We need to compute BaseKill here because the MemOps may have been
-      // reordered.
-      BaseKill = Loc->killsRegister(Base);
-
-      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base,
-                     BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
-      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
-                   MemOps, Merges);
-      return;
     }
 
-    if (MemOps[i].Position > MemOps[insertAfter].Position) {
-      insertAfter = i;
-      Loc = MemOps[i].MBBI;
-    }
-  }
-
-  BaseKill =  Loc->killsRegister(Base);
-  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
-                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
-}
-
-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
-                                unsigned Bytes, unsigned Limit,
-                                ARMCC::CondCodes Pred, unsigned PredReg) {
-  unsigned MyPredReg = 0;
-  if (!MI)
-    return false;
-
-  bool CheckCPSRDef = false;
-  switch (MI->getOpcode()) {
-  default: return false;
-  case ARM::tSUBi8:
-  case ARM::t2SUBri:
-  case ARM::SUBri:
-    CheckCPSRDef = true;
-    break;
-  case ARM::tSUBspi:
-    break;
-  }
-
-  // Make sure the offset fits in 8 bits.
-  if (Bytes == 0 || (Limit && Bytes >= Limit))
-    return false;
-
-  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
-                    MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
-  if (!(MI->getOperand(0).getReg() == Base &&
-        MI->getOperand(1).getReg() == Base &&
-        (MI->getOperand(2).getImm() * Scale) == Bytes &&
-        getInstrPredicate(MI, MyPredReg) == Pred &&
-        MyPredReg == PredReg))
-    return false;
-
-  return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
-                                unsigned Bytes, unsigned Limit,
-                                ARMCC::CondCodes Pred, unsigned PredReg) {
-  unsigned MyPredReg = 0;
-  if (!MI)
-    return false;
-
-  bool CheckCPSRDef = false;
-  switch (MI->getOpcode()) {
-  default: return false;
-  case ARM::tADDi8:
-  case ARM::t2ADDri:
-  case ARM::ADDri:
-    CheckCPSRDef = true;
-    break;
-  case ARM::tADDspi:
-    break;
-  }
-
-  if (Bytes == 0 || (Limit && Bytes >= Limit))
-    // Make sure the offset fits in 8 bits.
-    return false;
-
-  unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
-                    MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
-  if (!(MI->getOperand(0).getReg() == Base &&
-        MI->getOperand(1).getReg() == Base &&
-        (MI->getOperand(2).getImm() * Scale) == Bytes &&
-        getInstrPredicate(MI, MyPredReg) == Pred &&
-        MyPredReg == PredReg))
-    return false;
-
-  return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  default: return 0;
-  case ARM::LDRi12:
-  case ARM::STRi12:
-  case ARM::tLDRi:
-  case ARM::tSTRi:
-  case ARM::tLDRspi:
-  case ARM::tSTRspi:
-  case ARM::t2LDRi8:
-  case ARM::t2LDRi12:
-  case ARM::t2STRi8:
-  case ARM::t2STRi12:
-  case ARM::VLDRS:
-  case ARM::VSTRS:
-    return 4;
-  case ARM::VLDRD:
-  case ARM::VSTRD:
-    return 8;
-  case ARM::LDMIA:
-  case ARM::LDMDA:
-  case ARM::LDMDB:
-  case ARM::LDMIB:
-  case ARM::STMIA:
-  case ARM::STMDA:
-  case ARM::STMDB:
-  case ARM::STMIB:
-  case ARM::tLDMIA:
-  case ARM::tLDMIA_UPD:
-  case ARM::tSTMIA_UPD:
-  case ARM::t2LDMIA:
-  case ARM::t2LDMDB:
-  case ARM::t2STMIA:
-  case ARM::t2STMDB:
-  case ARM::VLDMSIA:
-  case ARM::VSTMSIA:
-    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
-  case ARM::VLDMDIA:
-  case ARM::VSTMDIA:
-    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
-  }
+    // Form a candidate from the Ops collected so far.
+    MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
+    for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
+      Candidate->Instrs.push_back(MemOps[C].MI);
+    Candidate->LatestMIIdx = Latest - SIndex;
+    Candidate->EarliestMIIdx = Earliest - SIndex;
+    Candidate->InsertPos = MemOps[Latest].Position;
+    if (Count == 1)
+      CanMergeToLSMulti = CanMergeToLSDouble = false;
+    Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
+    Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
+    Candidates.push_back(Candidate);
+    // Continue after the chain.
+    SIndex += Count;
+  } while (SIndex < EIndex);
 }
 
 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
@@ -1081,6 +1060,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
   }
 }
 
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+                                  ARMCC::CondCodes Pred, unsigned PredReg) {
+  bool CheckCPSRDef;
+  int Scale;
+  switch (MI.getOpcode()) {
+  case ARM::tADDi8:  Scale =  4; CheckCPSRDef = true; break;
+  case ARM::tSUBi8:  Scale = -4; CheckCPSRDef = true; break;
+  case ARM::t2SUBri:
+  case ARM::SUBri:   Scale = -1; CheckCPSRDef = true; break;
+  case ARM::t2ADDri:
+  case ARM::ADDri:   Scale =  1; CheckCPSRDef = true; break;
+  case ARM::tADDspi: Scale =  4; CheckCPSRDef = false; break;
+  case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+  default: return 0;
+  }
+
+  unsigned MIPredReg;
+  if (MI.getOperand(0).getReg() != Reg ||
+      MI.getOperand(1).getReg() != Reg ||
+      getInstrPredicate(&MI, MIPredReg) != Pred ||
+      MIPredReg != PredReg)
+    return 0;
+
+  if (CheckCPSRDef && definesCPSR(&MI))
+    return 0;
+  return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+                 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+  Offset = 0;
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  if (MBBI == BeginMBBI)
+    return EndMBBI;
+
+  // Skip debug values.
+  MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+  while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+    --PrevMBBI;
+
+  Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+  return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+                ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+  Offset = 0;
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+  // Skip debug values.
+  while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+    ++NextMBBI;
+  if (NextMBBI == EndMBBI)
+    return EndMBBI;
+
+  Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+  return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
 /// Fold proceeding/trailing inc/dec of base register into the
 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
 ///
@@ -1093,21 +1141,17 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
 /// ldmia rn, <ra, rb, rc>
 /// =>
 /// ldmdb rn!, <ra, rb, rc>
-bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator MBBI,
-                                               bool &Advance,
-                                               MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
   // Thumb1 is already using updating loads/stores.
   if (isThumb1) return false;
 
-  MachineInstr *MI = MBBI;
-  unsigned Base = MI->getOperand(0).getReg();
-  bool BaseKill = MI->getOperand(0).isKill();
-  unsigned Bytes = getLSMultipleTransferSize(MI);
+  const MachineOperand &BaseOP = MI->getOperand(0);
+  unsigned Base = BaseOP.getReg();
+  bool BaseKill = BaseOP.isKill();
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   unsigned Opcode = MI->getOpcode();
-  DebugLoc dl = MI->getDebugLoc();
+  DebugLoc DL = MI->getDebugLoc();
 
   // Can't use an updating ld/st if the base register is also a dest
   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
@@ -1115,55 +1159,27 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     if (MI->getOperand(i).getReg() == Base)
       return false;
 
-  bool DoMerge = false;
+  int Bytes = getLSMultipleTransferSize(MI);
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineBasicBlock::iterator MBBI(MI);
+  int Offset;
+  MachineBasicBlock::iterator MergeInstr
+    = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
   ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
-
-  // Try merging with the previous instruction.
-  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
-  if (MBBI != BeginMBBI) {
-    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
-    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
-      --PrevMBBI;
-    if (Mode == ARM_AM::ia &&
-        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      Mode = ARM_AM::db;
-      DoMerge = true;
-    } else if (Mode == ARM_AM::ib &&
-               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      Mode = ARM_AM::da;
-      DoMerge = true;
-    }
-    if (DoMerge)
-      MBB.erase(PrevMBBI);
-  }
-
-  // Try merging with the next instruction.
-  MachineBasicBlock::iterator EndMBBI = MBB.end();
-  if (!DoMerge && MBBI != EndMBBI) {
-    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
-    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
-      ++NextMBBI;
-    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
-        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      DoMerge = true;
-    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
-               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      DoMerge = true;
-    }
-    if (DoMerge) {
-      if (NextMBBI == I) {
-        Advance = true;
-        ++I;
-      }
-      MBB.erase(NextMBBI);
-    }
+  if (Mode == ARM_AM::ia && Offset == -Bytes) {
+    Mode = ARM_AM::db;
+  } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+    Mode = ARM_AM::da;
+  } else {
+    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+    if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+        ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+      return false;
   }
-
-  if (!DoMerge)
-    return false;
+  MBB.erase(MergeInstr);
 
   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
-  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
     .addReg(Base, getDefRegState(true)) // WB base register
     .addReg(Base, getKillRegState(BaseKill))
     .addImm(Pred).addReg(PredReg);
@@ -1231,21 +1247,15 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
 
 /// Fold proceeding/trailing inc/dec of base register into the
 /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
-bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator MBBI,
-                                               const TargetInstrInfo *TII,
-                                               bool &Advance,
-                                               MachineBasicBlock::iterator &I) {
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
   // Thumb1 doesn't have updating LDR/STR.
   // FIXME: Use LDM/STM with single register instead.
   if (isThumb1) return false;
 
-  MachineInstr *MI = MBBI;
-  unsigned Base = MI->getOperand(1).getReg();
-  bool BaseKill = MI->getOperand(1).isKill();
-  unsigned Bytes = getLSMultipleTransferSize(MI);
+  unsigned Base = getLoadStoreBaseOp(*MI).getReg();
+  bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
   unsigned Opcode = MI->getOpcode();
-  DebugLoc dl = MI->getDebugLoc();
+  DebugLoc DL = MI->getDebugLoc();
   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
@@ -1255,7 +1265,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
     return false;
 
-  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
   // Can't do the merge if the destination register is the same as the would-be
   // writeback register.
   if (MI->getOperand(0).getReg() == Base)
@@ -1263,64 +1272,38 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
 
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
-  bool DoMerge = false;
-  ARM_AM::AddrOpc AddSub = ARM_AM::add;
-  unsigned NewOpc = 0;
-  // AM2 - 12 bits, thumb2 - 8 bits.
-  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
-
-  // Try merging with the previous instruction.
-  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
-  if (MBBI != BeginMBBI) {
-    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
-    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
-      --PrevMBBI;
-    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
-      DoMerge = true;
-      AddSub = ARM_AM::sub;
-    } else if (!isAM5 &&
-               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
-      DoMerge = true;
-    }
-    if (DoMerge) {
-      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
-      MBB.erase(PrevMBBI);
-    }
-  }
-
-  // Try merging with the next instruction.
-  MachineBasicBlock::iterator EndMBBI = MBB.end();
-  if (!DoMerge && MBBI != EndMBBI) {
-    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
-    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
-      ++NextMBBI;
-    if (!isAM5 &&
-        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
-      DoMerge = true;
-      AddSub = ARM_AM::sub;
-    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
-      DoMerge = true;
-    }
-    if (DoMerge) {
-      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
-      if (NextMBBI == I) {
-        Advance = true;
-        ++I;
-      }
-      MBB.erase(NextMBBI);
-    }
+  int Bytes = getLSMultipleTransferSize(MI);
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineBasicBlock::iterator MBBI(MI);
+  int Offset;
+  MachineBasicBlock::iterator MergeInstr
+    = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+  unsigned NewOpc;
+  if (!isAM5 && Offset == Bytes) {
+    NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+  } else if (Offset == -Bytes) {
+    NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+  } else {
+    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+    if (Offset == Bytes) {
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+    } else if (!isAM5 && Offset == -Bytes) {
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+    } else
+      return false;
   }
+  MBB.erase(MergeInstr);
 
-  if (!DoMerge)
-    return false;
+  ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
 
+  bool isLd = isLoadSingle(Opcode);
   if (isAM5) {
     // VLDM[SD]_UPD, VSTM[SD]_UPD
     // (There are no base-updating versions of VLDR/VSTR instructions, but the
     // updating load/store-multiple instructions can be used with only one
     // register.)
     MachineOperand &MO = MI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
       .addReg(Base, getDefRegState(true)) // WB base register
       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
       .addImm(Pred).addReg(PredReg)
@@ -1330,20 +1313,18 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     if (isAM2) {
       // LDR_PRE, LDR_POST
       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
-        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
-        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
           .addReg(Base, RegState::Define)
           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
       } else {
-        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
-        BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+        BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
           .addReg(Base, RegState::Define)
-          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+          .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
       }
     } else {
-      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
       // t2LDR_PRE, t2LDR_POST
-      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+      BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
         .addReg(Base, RegState::Define)
         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
     }
@@ -1353,15 +1334,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     // the vestigal zero-reg offset register. When that's fixed, this clause
     // can be removed entirely.
     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
-      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+      int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
       // STR_PRE, STR_POST
-      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+      BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
-        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+        .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
     } else {
-      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
       // t2STR_PRE, t2STR_POST
-      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+      BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
     }
@@ -1371,6 +1351,66 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   return true;
 }
 
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
+  assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+         "Must have t2STRDi8 or t2LDRDi8");
+  if (MI.getOperand(3).getImm() != 0)
+    return false;
+
+  // Behaviour for writeback is undefined if base register is the same as one
+  // of the others.
+  const MachineOperand &BaseOp = MI.getOperand(2);
+  unsigned Base = BaseOp.getReg();
+  const MachineOperand &Reg0Op = MI.getOperand(0);
+  const MachineOperand &Reg1Op = MI.getOperand(1);
+  if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
+    return false;
+
+  unsigned PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+  MachineBasicBlock::iterator MBBI(MI);
+  MachineBasicBlock &MBB = *MI.getParent();
+  int Offset;
+  MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+                                                            PredReg, Offset);
+  unsigned NewOpc;
+  if (Offset == 8 || Offset == -8) {
+    NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+  } else {
+    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+    if (Offset == 8 || Offset == -8) {
+      NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+    } else
+      return false;
+  }
+  MBB.erase(MergeInstr);
+
+  DebugLoc DL = MI.getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+  if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+    MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+       .addReg(BaseOp.getReg(), RegState::Define);
+  } else {
+    assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+    MIB.addReg(BaseOp.getReg(), RegState::Define)
+       .addOperand(Reg0Op).addOperand(Reg1Op);
+  }
+  MIB.addReg(BaseOp.getReg(), RegState::Kill)
+     .addImm(Offset).addImm(Pred).addReg(PredReg);
+  assert(TII->get(Opcode).getNumOperands() == 6 &&
+         TII->get(NewOpc).getNumOperands() == 7 &&
+         "Unexpected number of operands in Opcode specification.");
+
+  // Transfer implicit operands.
+  for (const MachineOperand &MO : MI.implicit_operands())
+    MIB.addOperand(MO);
+  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MBB.erase(MBBI);
+  return true;
+}
+
 /// Returns true if instruction is a memory operation that this pass is capable
 /// of operating on.
 static bool isMemoryOp(const MachineInstr *MI) {
@@ -1426,26 +1466,10 @@ static bool isMemoryOp(const MachineInstr *MI) {
   return false;
 }
 
-/// Advance register scavenger to just before the earliest memory op that is
-/// being merged.
-void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
-  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
-  unsigned Position = MemOps[0].Position;
-  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
-    if (MemOps[i].Position < Position) {
-      Position = MemOps[i].Position;
-      Loc = MemOps[i].MBBI;
-    }
-  }
-
-  if (Loc != MBB.begin())
-    RS->forward(std::prev(Loc));
-}
-
 static void InsertLDR_STR(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator &MBBI,
                           int Offset, bool isDef,
-                          DebugLoc dl, unsigned NewOpc,
+                          DebugLoc DL, unsigned NewOpc,
                           unsigned Reg, bool RegDeadKill, bool RegUndef,
                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
                           bool OffKill, bool OffUndef,
@@ -1491,7 +1515,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   if (!Errata602117 && !NonConsecutiveRegs)
     return false;
 
-  MachineBasicBlock::iterator NewBBI = MBBI;
   bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
   bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
   bool EvenDeadKill = isLd ?
@@ -1531,7 +1554,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                 getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
       ++NumSTRD2STM;
     }
-    NewBBI = std::prev(MBBI);
   } else {
     // Split into two instructions.
     unsigned NewOpc = (isLd)
@@ -1553,7 +1575,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                     OddReg, OddDeadKill, false,
                     BaseReg, false, BaseUndef, false, OffUndef,
                     Pred, PredReg, TII, isT2);
-      NewBBI = std::prev(MBBI);
       InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                     EvenReg, EvenDeadKill, false,
                     BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1573,7 +1594,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                     EvenReg, EvenDeadKill, EvenUndef,
                     BaseReg, false, BaseUndef, false, OffUndef,
                     Pred, PredReg, TII, isT2);
-      NewBBI = std::prev(MBBI);
       InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
                     OddReg, OddDeadKill, OddUndef,
                     BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1585,191 +1605,160 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
       ++NumSTRD2STR;
   }
 
-  MBB.erase(MI);
-  MBBI = NewBBI;
+  MBBI = MBB.erase(MBBI);
   return true;
 }
 
 /// An optimization pass to turn multiple LDR / STR ops of the same base and
 /// incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
-  unsigned NumMerges = 0;
-  unsigned NumMemOps = 0;
   MemOpQueue MemOps;
   unsigned CurrBase = 0;
   unsigned CurrOpc = ~0u;
-  unsigned CurrSize = 0;
   ARMCC::CondCodes CurrPred = ARMCC::AL;
-  unsigned CurrPredReg = 0;
   unsigned Position = 0;
-  SmallVector<MachineBasicBlock::iterator,4> Merges;
-
-  RS->enterBasicBlock(&MBB);
-  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-  while (MBBI != E) {
+  assert(Candidates.size() == 0);
+  assert(MergeBaseCandidates.size() == 0);
+  LiveRegsValid = false;
+
+  for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
+       I = MBBI) {
+    // The instruction in front of the iterator is the one we look at.
+    MBBI = std::prev(I);
     if (FixInvalidRegPairOp(MBB, MBBI))
       continue;
+    ++Position;
 
-    bool Advance  = false;
-    bool TryMerge = false;
-
-    bool isMemOp = isMemoryOp(MBBI);
-    if (isMemOp) {
+    if (isMemoryOp(MBBI)) {
       unsigned Opcode = MBBI->getOpcode();
-      unsigned Size = getLSMultipleTransferSize(MBBI);
       const MachineOperand &MO = MBBI->getOperand(0);
       unsigned Reg = MO.getReg();
-      bool isKill = MO.isDef() ? false : MO.isKill();
-      unsigned Base = MBBI->getOperand(1).getReg();
+      unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
       unsigned PredReg = 0;
       ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
       int Offset = getMemoryOpOffset(MBBI);
-      // Watch out for:
-      // r4 := ldr [r5]
-      // r5 := ldr [r5, #4]
-      // r6 := ldr [r5, #8]
-      //
-      // The second ldr has effectively broken the chain even though it
-      // looks like the later ldr(s) use the same base register. Try to
-      // merge the ldr's so far, including this one. But don't try to
-      // combine the following ldr(s).
-      bool Clobber = isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg();
-
-      // Watch out for:
-      // r4 := ldr [r0, #8]
-      // r4 := ldr [r0, #4]
-      //
-      // The optimization may reorder the second ldr in front of the first
-      // ldr, which violates write after write(WAW) dependence. The same as
-      // str. Try to merge inst(s) already in MemOps.
-      bool Overlap = false;
-      for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
-        if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
-          Overlap = true;
-          break;
-        }
-      }
-
-      if (CurrBase == 0 && !Clobber) {
+      if (CurrBase == 0) {
         // Start of a new chain.
         CurrBase = Base;
         CurrOpc  = Opcode;
-        CurrSize = Size;
         CurrPred = Pred;
-        CurrPredReg = PredReg;
-        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
-        ++NumMemOps;
-        Advance = true;
-      } else if (!Overlap) {
-        if (Clobber) {
-          TryMerge = true;
-          Advance = true;
+        MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+        continue;
+      }
+      // Note: No need to match PredReg in the next if.
+      if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+        // Watch out for:
+        //   r4 := ldr [r0, #8]
+        //   r4 := ldr [r0, #4]
+        // or
+        //   r0 := ldr [r0]
+        // If a load overrides the base register or a register loaded by
+        // another load in our chain, we cannot take this instruction.
+        bool Overlap = false;
+        if (isLoadSingle(Opcode)) {
+          Overlap = (Base == Reg);
+          if (!Overlap) {
+            for (const MemOpQueueEntry &E : MemOps) {
+              if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
+                Overlap = true;
+                break;
+              }
+            }
+          }
         }
 
-        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
-          // No need to match PredReg.
-          // Continue adding to the queue.
+        if (!Overlap) {
+          // Check offset and sort memory operation into the current chain.
           if (Offset > MemOps.back().Offset) {
-            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
-                                             Position, MBBI));
-            ++NumMemOps;
-            Advance = true;
+            MemOps.push_back(MemOpQueueEntry(MBBI, Offset, Position));
+            continue;
           } else {
-            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
-                 I != E; ++I) {
-              if (Offset < I->Offset) {
-                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
-                                                 Position, MBBI));
-                ++NumMemOps;
-                Advance = true;
+            MemOpQueue::iterator MI, ME;
+            for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
+              if (Offset < MI->Offset) {
+                // Found a place to insert.
                 break;
-              } else if (Offset == I->Offset) {
-                // Collision! This can't be merged!
+              }
+              if (Offset == MI->Offset) {
+                // Collision, abort.
+                MI = ME;
                 break;
               }
             }
+            if (MI != MemOps.end()) {
+              MemOps.insert(MI, MemOpQueueEntry(MBBI, Offset, Position));
+              continue;
+            }
           }
         }
       }
-    }
 
-    if (MBBI->isDebugValue()) {
-      ++MBBI;
-      if (MBBI == E)
-        // Reach the end of the block, try merging the memory instructions.
-        TryMerge = true;
-    } else if (Advance) {
-      ++Position;
-      ++MBBI;
-      if (MBBI == E)
-        // Reach the end of the block, try merging the memory instructions.
-        TryMerge = true;
-    } else {
-      TryMerge = true;
+      // Don't advance the iterator; The op will start a new chain next.
+      MBBI = I;
+      --Position;
+      // Fallthrough to look into existing chain.
+    } else if (MBBI->isDebugValue()) {
+      continue;
+    } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+               MBBI->getOpcode() == ARM::t2STRDi8) {
+      // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+      // remember them because we may still be able to merge add/sub into them.
+      MergeBaseCandidates.push_back(MBBI);
     }
 
-    if (TryMerge) {
-      if (NumMemOps > 1) {
-        // Try to find a free register to use as a new base in case it's needed.
-        // First advance to the instruction just before the start of the chain.
-        AdvanceRS(MBB, MemOps);
-
-        // Find a scratch register.
-        unsigned Scratch =
-          RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
-
-        // Process the load / store instructions.
-        RS->forward(std::prev(MBBI));
-
-        // Merge ops.
-        Merges.clear();
-        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
-                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
-
-        // Try folding preceding/trailing base inc/dec into the generated
-        // LDM/STM ops.
-        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
-          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
-            ++NumMerges;
-        NumMerges += Merges.size();
-
-        // Try folding preceding/trailing base inc/dec into those load/store
-        // that were not merged to form LDM/STM ops.
-        for (unsigned i = 0; i != NumMemOps; ++i)
-          if (!MemOps[i].Merged)
-            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
-              ++NumMerges;
-
-        // RS may be pointing to an instruction that's deleted.
-        RS->skipTo(std::prev(MBBI));
-      } else if (NumMemOps == 1) {
-        // Try folding preceding/trailing base inc/dec into the single
-        // load/store.
-        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
-          ++NumMerges;
-          RS->forward(std::prev(MBBI));
-        }
-      }
 
+    // If we are here then the chain is broken; Extract candidates for a merge.
+    if (MemOps.size() > 0) {
+      FormCandidates(MemOps);
+      // Reset for the next chain.
       CurrBase = 0;
       CurrOpc = ~0u;
-      CurrSize = 0;
       CurrPred = ARMCC::AL;
-      CurrPredReg = 0;
-      if (NumMemOps) {
-        MemOps.clear();
-        NumMemOps = 0;
-      }
+      MemOps.clear();
+    }
+  }
+  if (MemOps.size() > 0)
+    FormCandidates(MemOps);
 
-      // If iterator hasn't been advanced and this is not a memory op, skip it.
-      // It can't start a new chain anyway.
-      if (!Advance && !isMemOp && MBBI != E) {
-        ++Position;
-        ++MBBI;
+  // Sort candidates so they get processed from end to begin of the basic
+  // block later; This is necessary for liveness calculation.
+  auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
+    return M0->InsertPos < M1->InsertPos;
+  };
+  std::sort(Candidates.begin(), Candidates.end(), LessThan);
+
+  // Go through list of candidates and merge.
+  bool Changed = false;
+  for (const MergeCandidate *Candidate : Candidates) {
+    if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
+      MachineInstr *Merged = MergeOpsUpdate(*Candidate);
+      // Merge preceding/trailing base inc/dec into the merged op.
+      if (Merged) {
+        Changed = true;
+        unsigned Opcode = Merged->getOpcode();
+        if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+          MergeBaseUpdateLSDouble(*Merged);
+        else
+          MergeBaseUpdateLSMultiple(Merged);
+      } else {
+        for (MachineInstr *MI : Candidate->Instrs) {
+          if (MergeBaseUpdateLoadStore(MI))
+            Changed = true;
+        }
       }
+    } else {
+      assert(Candidate->Instrs.size() == 1);
+      if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
+        Changed = true;
     }
   }
-  return NumMerges > 0;
+  Candidates.clear();
+  // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+  for (MachineInstr *MI : MergeBaseCandidates)
+    MergeBaseUpdateLSDouble(*MI);
+  MergeBaseCandidates.clear();
+
+  return Changed;
 }
 
 /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
@@ -1814,12 +1803,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
 }
 
 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  MF = &Fn;
   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
   TL = STI->getTargetLowering();
   AFI = Fn.getInfo<ARMFunctionInfo>();
   TII = STI->getInstrInfo();
   TRI = STI->getRegisterInfo();
-  RS = new RegScavenger();
+  MRI = &Fn.getRegInfo();
+  RegClassInfoValid = false;
   isThumb2 = AFI->isThumb2Function();
   isThumb1 = AFI->isThumbFunction() && !isThumb2;
 
@@ -1832,7 +1823,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
       Modified |= MergeReturnIntoLDM(MBB);
   }
 
-  delete RS;
+  Allocator.DestroyAll();
   return Modified;
 }
 
@@ -2219,7 +2210,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
         continue;
 
       int Opc = MI->getOpcode();
-      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+      bool isLd = isLoadSingle(Opc);
       unsigned Base = MI->getOperand(1).getReg();
       int Offset = getMemoryOpOffset(MI);
 
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a59cf9851108..6cafbbb9f8eb 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -18,12 +18,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "arm-selectiondag-info"
 
-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
-    : TargetSelectionDAGInfo(&DL) {}
-
-ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
-}
-
 // Emit, if possible, a specialized version of the given Libcall. Typically this
 // means selecting the appropriately aligned version, but we also convert memset
 // of 0 into memclr.
@@ -83,7 +77,7 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
-  Entry.Ty = TLI->getDataLayout()->getIntPtrType(*DAG.getContext());
+  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
   Entry.Node = Dst;
   Args.push_back(Entry);
   if (AEABILibcall == AEABI_MEMCLR) {
@@ -121,12 +115,14 @@ EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
   };
   TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl).setChain(Chain)
-    .setCallee(TLI->getLibcallCallingConv(LC),
-               Type::getVoidTy(*DAG.getContext()),
-               DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
-                                     TLI->getPointerTy()), std::move(Args), 0)
-    .setDiscardResult();
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setCallee(
+           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
+           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
+                                 TLI->getPointerTy(DAG.getDataLayout())),
+           std::move(Args), 0)
+      .setDiscardResult();
   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
   
   return CallResult.second;
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 1db190f41e1a..289879ee1d7e 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -37,8 +37,6 @@ namespace ARM_AM {
 
 class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  explicit ARMSelectionDAGInfo(const DataLayout &DL);
-  ~ARMSelectionDAGInfo();
 
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                   SDValue Chain,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 55808dfb9efe..002c3e9b6291 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -112,7 +112,6 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
     : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
       ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
       TargetTriple(TT), Options(TM.Options), TM(TM),
-      TSInfo(*TM.getDataLayout()),
       FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
@@ -172,6 +171,7 @@ void ARMSubtarget::initializeEnvironment() {
   AllowsUnalignedMem = false;
   Thumb2DSP = false;
   UseNaClTrap = false;
+  GenLongCalls = false;
   UnsafeFPMath = false;
 }
 
@@ -286,7 +286,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
   if (RelocM == Reloc::Static)
     return false;
 
-  bool isDecl = GV->isDeclarationForLinker();
+  bool isDef = GV->isStrongDefinitionForLinker();
 
   if (!isTargetMachO()) {
     // Extra load is needed for all externally visible.
@@ -294,34 +294,22 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
       return false;
     return true;
   } else {
-    if (RelocM == Reloc::PIC_) {
-      // If this is a strong reference to a definition, it is definitely not
-      // through a stub.
-      if (!isDecl && !GV->isWeakForLinker())
-        return false;
-
-      // Unless we have a symbol with hidden visibility, we have to go through a
-      // normal $non_lazy_ptr stub because this symbol might be resolved late.
-      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
-        return true;
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (isDef)
+      return false;
+
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return true;
 
+    if (RelocM == Reloc::PIC_) {
       // If symbol visibility is hidden, we have a stub for common symbol
       // references and external declarations.
-      if (isDecl || GV->hasCommonLinkage())
+      if (GV->isDeclarationForLinker() || GV->hasCommonLinkage())
         // Hidden $non_lazy_ptr reference.
         return true;
-
-      return false;
-    } else {
-      // If this is a strong reference to a definition, it is definitely not
-      // through a stub.
-      if (!isDecl && !GV->isWeakForLinker())
-        return false;
-
-      // Unless we have a symbol with hidden visibility, we have to go through a
-      // normal $non_lazy_ptr stub because this symbol might be resolved late.
-      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
-        return true;
     }
   }
 
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 9909a6a6d198..dd101df9b63d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -206,6 +206,9 @@ protected:
   /// NaCl TRAP instruction is generated instead of the regular TRAP.
   bool UseNaClTrap;
 
+  /// Generate calls via indirect call instructions.
+  bool GenLongCalls;
+
   /// Target machine allowed unsafe FP math (such as use of NEON fp)
   bool UnsafeFPMath;
 
@@ -342,6 +345,7 @@ public:
   bool hasMPExtension() const { return HasMPExtension; }
   bool hasThumb2DSP() const { return Thumb2DSP; }
   bool useNaClTrap() const { return UseNaClTrap; }
+  bool genLongCalls() const { return GenLongCalls; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 6e81bd2d349d..93495d66ae70 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -80,8 +80,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
   // FIXME: This is duplicated code from the front end and should be unified.
   if (TT.isOSBinFormatMachO()) {
     if (TT.getEnvironment() == llvm::Triple::EABI ||
-        (TT.getOS() == llvm::Triple::UnknownOS &&
-         TT.getObjectFormat() == llvm::Triple::MachO) ||
+        (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
         CPU.startswith("cortex-m")) {
       TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
     } else {
@@ -104,8 +103,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
       TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
       break;
     default:
-      if (TT.getOS() == llvm::Triple::NetBSD)
-	TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
+      if (TT.isOSNetBSD())
+        TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
       else
 	TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
       break;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index f4901fc24e44..2f194cf7ae06 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -61,14 +61,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
 
   if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
                                           ISD == ISD::FP_EXTEND)) {
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
     int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
     if (Idx != -1)
       return LT.first * NEONFltDblTbl[Idx].Cost;
   }
 
-  EVT SrcTy = TLI->getValueType(Src);
-  EVT DstTy = TLI->getValueType(Dst);
+  EVT SrcTy = TLI->getValueType(DL, Src);
+  EVT DstTy = TLI->getValueType(DL, Dst);
 
   if (!SrcTy.isSimple() || !DstTy.isSimple())
     return BaseT::getCastInstrCost(Opcode, Dst, Src);
@@ -282,8 +282,8 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
       { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
     };
 
-    EVT SelCondTy = TLI->getValueType(CondTy);
-    EVT SelValTy = TLI->getValueType(ValTy);
+    EVT SelCondTy = TLI->getValueType(DL, CondTy);
+    EVT SelValTy = TLI->getValueType(DL, ValTy);
     if (SelCondTy.isSimple() && SelValTy.isSimple()) {
       int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
                                        SelCondTy.getSimpleVT(),
@@ -292,7 +292,7 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
         return NEONVectorSelectTbl[Idx].Cost;
     }
 
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
     return LT.first;
   }
 
@@ -353,7 +353,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
         {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
 
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
     int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
     if (Idx == -1)
@@ -379,7 +379,7 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
 
         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
 
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
     int Idx =
         CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
     if (Idx == -1)
@@ -395,7 +395,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
     TTI::OperandValueProperties Opd2PropInfo) {
 
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   const unsigned FunctionCallDivCost = 20;
   const unsigned ReciprocalDivCost = 10;
@@ -468,7 +468,7 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
 unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                      unsigned Alignment,
                                      unsigned AddressSpace) {
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
 
   if (Src->isVectorTy() && Alignment != 16 &&
       Src->getVectorElementType()->isDoubleTy()) {
@@ -488,12 +488,12 @@ unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
   assert(isa<VectorType>(VecTy) && "Expect a vector type");
 
   // vldN/vstN doesn't support vector types of i64/f64 element.
-  bool EltIs64Bits = DL->getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+  bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
 
   if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
     unsigned NumElts = VecTy->getVectorNumElements();
     Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
-    unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy);
+    unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
 
     // vldN/vstN only support legal vector types of size 64 or 128 in bits.
     if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index f2e5db655ccf..84f256f73722 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -42,7 +42,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
 
 public:
   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
-      : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   ARMTTIImpl(const ARMTTIImpl &Arg)
@@ -50,18 +51,6 @@ public:
   ARMTTIImpl(ARMTTIImpl &&Arg)
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
-  ARMTTIImpl &operator=(const ARMTTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  ARMTTIImpl &operator=(ARMTTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
 
   /// \name Scalar TTI Implementations
   /// @{
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index c2db74619871..f8f0eb2d4baa 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -189,9 +189,9 @@ class ARMAsmParser : public MCTargetAsmParser {
     return getParser().Error(L, Msg, Ranges);
   }
 
-  bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands,
+  bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands,
                            unsigned ListNo, bool IsARPop = false);
-  bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands,
+  bool validatetSTMRegList(const MCInst &Inst, const OperandVector &Operands,
                            unsigned ListNo);
 
   int tryParseRegister();
@@ -242,6 +242,8 @@ class ARMAsmParser : public MCTargetAsmParser {
                              bool &CanAcceptCarrySet,
                              bool &CanAcceptPredicationCode);
 
+  void tryConvertingToTwoOperandForm(StringRef Mnemonic, bool CarrySetting,
+                                     OperandVector &Operands);
   bool isThumb() const {
     // FIXME: Can tablegen auto-generate this?
     return STI.getFeatureBits()[ARM::ModeThumb];
@@ -5465,6 +5467,92 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
     CanAcceptPredicationCode = true;
 }
 
+// \brief Some Thumb instructions have two operand forms that are not
+// available as three operand, convert to two operand form if possible.
+//
+// FIXME: We would really like to be able to tablegen'erate this.
+void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic,
+                                                 bool CarrySetting,
+                                                 OperandVector &Operands) {
+  if (Operands.size() != 6)
+    return;
+
+  const auto &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+        auto &Op4 = static_cast<ARMOperand &>(*Operands[4]);
+  if (!Op3.isReg() || !Op4.isReg())
+    return;
+
+  auto Op3Reg = Op3.getReg();
+  auto Op4Reg = Op4.getReg();
+
+  // For most Thumb2 cases we just generate the 3 operand form and reduce
+  // it in processInstruction(), but the 3 operand form of ADD (t2ADDrr)
+  // won't accept SP or PC so we do the transformation here taking care
+  // with immediate range in the 'add sp, sp #imm' case.
+  auto &Op5 = static_cast<ARMOperand &>(*Operands[5]);
+  if (isThumbTwo()) {
+    if (Mnemonic != "add")
+      return;
+    bool TryTransform = Op3Reg == ARM::PC || Op4Reg == ARM::PC ||
+                        (Op5.isReg() && Op5.getReg() == ARM::PC);
+    if (!TryTransform) {
+      TryTransform = (Op3Reg == ARM::SP || Op4Reg == ARM::SP ||
+                      (Op5.isReg() && Op5.getReg() == ARM::SP)) &&
+                     !(Op3Reg == ARM::SP && Op4Reg == ARM::SP &&
+                       Op5.isImm() && !Op5.isImm0_508s4());
+    }
+    if (!TryTransform)
+      return;
+  } else if (!isThumbOne())
+    return;
+
+  if (!(Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
+        Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+        Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
+        Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic"))
+    return;
+
+  // If first 2 operands of a 3 operand instruction are the same
+  // then transform to 2 operand version of the same instruction
+  // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
+  bool Transform = Op3Reg == Op4Reg;
+
+  // For communtative operations, we might be able to transform if we swap
+  // Op4 and Op5.  The 'ADD Rdm, SP, Rdm' form is already handled specially
+  // as tADDrsp.
+  const ARMOperand *LastOp = &Op5;
+  bool Swap = false;
+  if (!Transform && Op5.isReg() && Op3Reg == Op5.getReg() &&
+      ((Mnemonic == "add" && Op4Reg != ARM::SP) ||
+       Mnemonic == "and" || Mnemonic == "eor" ||
+       Mnemonic == "adc" || Mnemonic == "orr")) {
+    Swap = true;
+    LastOp = &Op4;
+    Transform = true;
+  }
+
+  // If both registers are the same then remove one of them from
+  // the operand list, with certain exceptions.
+  if (Transform) {
+    // Don't transform 'adds Rd, Rd, Rm' or 'sub{s} Rd, Rd, Rm' because the
+    // 2 operand forms don't exist.
+    if (((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub") &&
+        LastOp->isReg())
+      Transform = false;
+
+    // Don't transform 'add/sub{s} Rd, Rd, #imm' if the immediate fits into
+    // 3-bits because the ARMARM says not to.
+    if ((Mnemonic == "add" || Mnemonic == "sub") && LastOp->isImm0_7())
+      Transform = false;
+  }
+
+  if (Transform) {
+    if (Swap)
+      std::swap(Op4, Op5);
+    Operands.erase(Operands.begin() + 3);
+  }
+}
+
 bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
                                           OperandVector &Operands) {
   // FIXME: This is all horribly hacky. We really need a better way to deal
@@ -5838,6 +5926,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                    "VFP/Neon double precision register expected");
   }
 
+  tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands);
+
   // Some instructions, mostly Thumb, have forms for the same mnemonic that
   // do and don't have a cc_out optional-def operand. With some spot-checks
   // of the operand list, we can figure out which variant we're trying to
@@ -5901,48 +5991,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     }
   }
 
-  // If first 2 operands of a 3 operand instruction are the same
-  // then transform to 2 operand version of the same instruction
-  // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1'
-  // FIXME: We would really like to be able to tablegen'erate this.
-  if (isThumbOne() && Operands.size() == 6 &&
-       (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" ||
-        Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
-        Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" ||
-        Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) {
-      ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
-      ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]);
-      ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]);
-
-      // If both registers are the same then remove one of them from
-      // the operand list.
-      if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) {
-          // If 3rd operand (variable Op5) is a register and the instruction is adds/sub
-          // then do not transform as the backend already handles this instruction
-          // correctly.
-          if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) {
-              Operands.erase(Operands.begin() + 3);
-              if (Mnemonic == "add" && !CarrySetting) {
-                  // Special case for 'add' (not 'adds') instruction must
-                  // remove the CCOut operand as well.
-                  Operands.erase(Operands.begin() + 1);
-              }
-          }
-      }
-  }
-
-  // If instruction is 'add' and first two register operands
-  // use SP register, then remove one of the SP registers from
-  // the instruction.
-  // FIXME: We would really like to be able to tablegen'erate this.
-  if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) {
-      ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
-      ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
-      if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) {
-          Operands.erase(Operands.begin() + 2);
-      }
-  }
-
   // GNU Assembler extension (compatibility)
   if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
     ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
@@ -5985,8 +6033,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 // return 'true' if register list contains non-low GPR registers,
 // 'false' otherwise. If Reg is in the register list or is HiReg, set
 // 'containsReg' to true.
-static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
-                                 unsigned HiReg, bool &containsReg) {
+static bool checkLowRegisterList(const MCInst &Inst, unsigned OpNo,
+                                 unsigned Reg, unsigned HiReg,
+                                 bool &containsReg) {
   containsReg = false;
   for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
     unsigned OpReg = Inst.getOperand(i).getReg();
@@ -6001,8 +6050,8 @@ static bool checkLowRegisterList(MCInst Inst, unsigned OpNo, unsigned Reg,
 
 // Check if the specified regisgter is in the register list of the inst,
 // starting at the indicated operand number.
-static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
-  for (unsigned i = OpNo; i < Inst.getNumOperands(); ++i) {
+static bool listContainsReg(const MCInst &Inst, unsigned OpNo, unsigned Reg) {
+  for (unsigned i = OpNo, e = Inst.getNumOperands(); i < e; ++i) {
     unsigned OpReg = Inst.getOperand(i).getReg();
     if (OpReg == Reg)
       return true;
@@ -6020,7 +6069,7 @@ static bool instIsBreakpoint(const MCInst &Inst) {
 
 }
 
-bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
+bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst,
                                        const OperandVector &Operands,
                                        unsigned ListNo, bool IsARPop) {
   const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
@@ -6043,7 +6092,7 @@ bool ARMAsmParser::validatetLDMRegList(MCInst Inst,
   return false;
 }
 
-bool ARMAsmParser::validatetSTMRegList(MCInst Inst,
+bool ARMAsmParser::validatetSTMRegList(const MCInst &Inst,
                                        const OperandVector &Operands,
                                        unsigned ListNo) {
   const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]);
@@ -8167,8 +8216,16 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     // If the destination and first source operand are the same, and
     // there's no setting of the flags, use encoding T2 instead of T3.
     // Note that this is only for ADD, not SUB. This mirrors the system
-    // 'as' behaviour. Make sure the wide encoding wasn't explicit.
-    if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+    // 'as' behaviour.  Also take advantage of ADD being commutative.
+    // Make sure the wide encoding wasn't explicit.
+    bool Swap = false;
+    auto DestReg = Inst.getOperand(0).getReg();
+    bool Transform = DestReg == Inst.getOperand(1).getReg();
+    if (!Transform && DestReg == Inst.getOperand(2).getReg()) {
+      Transform = true;
+      Swap = true;
+    }
+    if (!Transform ||
         Inst.getOperand(5).getReg() != 0 ||
         (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
          static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
@@ -8177,7 +8234,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
     TmpInst.setOpcode(ARM::tADDhirr);
     TmpInst.addOperand(Inst.getOperand(0));
     TmpInst.addOperand(Inst.getOperand(0));
-    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(Inst.getOperand(Swap ? 1 : 2));
     TmpInst.addOperand(Inst.getOperand(3));
     TmpInst.addOperand(Inst.getOperand(4));
     Inst = TmpInst;
@@ -9176,8 +9233,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
     return false;
   }
 
-  STI.InitMCProcessorInfo(CPU, "");
-  STI.InitCPUSchedModel(CPU);
+  STI.setDefaultFeatures(CPU);
   setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
   return false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 4d12bfb5d60f..d17fdb95dbdf 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -1362,7 +1362,7 @@ MCTargetStreamer *createARMNullTargetStreamer(MCStreamer &S) {
 MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
                                                 const MCSubtargetInfo &STI) {
   const Triple &TT = STI.getTargetTriple();
-  if (TT.getObjectFormat() == Triple::ELF)
+  if (TT.isOSBinFormatELF())
     return new ARMTargetELFStreamer(S);
   return new ARMTargetStreamer(S);
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index fafe25ae5be5..21c9fc1e58b2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
 #define GET_REGINFO_MC_DESC
 #include "ARMGenRegisterInfo.inc"
 
-static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
                                   std::string &Info) {
   if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
       (MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
@@ -63,7 +63,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
   return false;
 }
 
-static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getITDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
                                  std::string &Info) {
   if (STI.getFeatureBits()[llvm::ARM::HasV8Ops] && MI.getOperand(1).isImm() &&
       MI.getOperand(1).getImm() != 8) {
@@ -75,7 +75,7 @@ static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
   return false;
 }
 
-static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
                                        std::string &Info) {
   assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
          "cannot predicate thumb instructions");
@@ -92,7 +92,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
   return false;
 }
 
-static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI,
+static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
                                       std::string &Info) {
   assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
          "cannot predicate thumb instructions");
@@ -257,9 +257,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(const Triple &TT,
       ArchFS = FS;
   }
 
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
-  return X;
+  return createARMMCSubtargetInfoImpl(TT, CPU, ArchFS);
 }
 
 static MCInstrInfo *createARMMCInstrInfo() {
@@ -268,7 +266,7 @@ static MCInstrInfo *createARMMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
+static MCRegisterInfo *createARMMCRegisterInfo(const Triple &Triple) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitARMMCRegisterInfo(X, ARM::LR, 0, 0, ARM::PC);
   return X;
@@ -279,10 +277,10 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
   MCAsmInfo *MAI;
   if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
     MAI = new ARMMCAsmInfoDarwin(TheTriple);
-  else if (TheTriple.isWindowsItaniumEnvironment())
-    MAI = new ARMCOFFMCAsmInfoGNU();
   else if (TheTriple.isWindowsMSVCEnvironment())
     MAI = new ARMCOFFMCAsmInfoMicrosoft();
+  else if (TheTriple.isOSWindows())
+    MAI = new ARMCOFFMCAsmInfoGNU();
   else
     MAI = new ARMELFMCAsmInfo(TheTriple);
 
@@ -292,14 +290,13 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createARMMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
                                              CodeModel::Model CM,
                                              CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   if (RM == Reloc::Default) {
-    Triple TheTriple(TT);
     // Default relocation model on Darwin is PIC, not DynamicNoPIC.
-    RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
+    RM = TT.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
   }
   X->initMCCodeGenInfo(RM, CM, OL);
   return X;
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 77cd890e4cad..3b4358b5d9bf 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -365,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
       // frame pointer stack slot, the target is ELF and the function has FP, or
       // the target uses var sized objects.
       if (NumBytes) {
-        assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+        assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
                "No scratch register to restore SP from FP!");
         emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                   TII, *RegInfo);
diff --git a/lib/Target/BPF/BPFFrameLowering.cpp b/lib/Target/BPF/BPFFrameLowering.cpp
index 54c5ececc7de..c2806c85f24f 100644
--- a/lib/Target/BPF/BPFFrameLowering.cpp
+++ b/lib/Target/BPF/BPFFrameLowering.cpp
@@ -29,12 +29,12 @@ void BPFFrameLowering::emitPrologue(MachineFunction &MF,
 void BPFFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {}
 
-void BPFFrameLowering::processFunctionBeforeCalleeSavedScan(
-    MachineFunction &MF, RegScavenger *RS) const {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  MRI.setPhysRegUnused(BPF::R6);
-  MRI.setPhysRegUnused(BPF::R7);
-  MRI.setPhysRegUnused(BPF::R8);
-  MRI.setPhysRegUnused(BPF::R9);
+void BPFFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                            BitVector &SavedRegs,
+                                            RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+  SavedRegs.reset(BPF::R6);
+  SavedRegs.reset(BPF::R7);
+  SavedRegs.reset(BPF::R8);
+  SavedRegs.reset(BPF::R9);
 }
diff --git a/lib/Target/BPF/BPFFrameLowering.h b/lib/Target/BPF/BPFFrameLowering.h
index 3b9fc443e053..251cda965ff5 100644
--- a/lib/Target/BPF/BPFFrameLowering.h
+++ b/lib/Target/BPF/BPFFrameLowering.h
@@ -28,8 +28,8 @@ public:
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
   bool hasFP(const MachineFunction &MF) const override;
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS) const override;
 
   void
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 38c56bbef81e..58498a1aec7d 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -302,8 +302,9 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     DAG.getContext()->diagnose(Err);
   }
 
+  auto PtrVT = getPointerTy(MF.getDataLayout());
   Chain = DAG.getCALLSEQ_START(
-      Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true), CLI.DL);
+      Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true), CLI.DL);
 
   SmallVector<std::pair<unsigned, SDValue>, 5> RegsToPass;
 
@@ -350,10 +351,10 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
   // Likewise ExternalSymbol -> TargetExternalSymbol.
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy(),
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
                                         G->getOffset(), 0);
   else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy(), 0);
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
 
   // Returns a chain & a flag for retval copy to use.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -374,8 +375,8 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Create the CALLSEQ_END node.
   Chain = DAG.getCALLSEQ_END(
-      Chain, DAG.getConstant(NumBytes, CLI.DL, getPointerTy(), true),
-      DAG.getConstant(0, CLI.DL, getPointerTy(), true), InFlag, CLI.DL);
+      Chain, DAG.getConstant(NumBytes, CLI.DL, PtrVT, true),
+      DAG.getConstant(0, CLI.DL, PtrVT, true), InFlag, CLI.DL);
   InFlag = Chain.getValue(1);
 
   // Handle result values, copying them out of physregs into vregs that we
diff --git a/lib/Target/BPF/BPFSubtarget.cpp b/lib/Target/BPF/BPFSubtarget.cpp
index 65acd585116d..c3a8b1caa63d 100644
--- a/lib/Target/BPF/BPFSubtarget.cpp
+++ b/lib/Target/BPF/BPFSubtarget.cpp
@@ -28,4 +28,4 @@ void BPFSubtarget::anchor() {}
 BPFSubtarget::BPFSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS, const TargetMachine &TM)
     : BPFGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
-      TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
+      TLInfo(TM, *this) {}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 3e928fc93a37..840570ebc392 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -40,7 +40,7 @@ static MCInstrInfo *createBPFMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createBPFMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitBPFMCRegisterInfo(X, BPF::R11 /* RAReg doesn't exist */);
   return X;
@@ -48,12 +48,10 @@ static MCRegisterInfo *createBPFMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT,
                                                  StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitBPFMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createBPFMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
-static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createBPFMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
                                              CodeModel::Model CM,
                                              CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index bc5d7f65b2f6..272688edb8a1 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -2148,8 +2148,8 @@ char CppWriter::ID = 0;
 
 bool CPPTargetMachine::addPassesToEmitFile(
     PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType,
-    bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter,
-    MachineFunctionInitializer *MFInitializer) {
+    bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
+    AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
   if (FileType != TargetMachine::CGFT_AssemblyFile)
     return true;
   auto FOut = llvm::make_unique<formatted_raw_ostream>(o);
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index ebf0635b12e4..00e402feffbc 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -31,7 +31,8 @@ struct CPPTargetMachine : public TargetMachine {
 public:
   bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
                            CodeGenFileType FileType, bool DisableVerify,
-                           AnalysisID StartAfter, AnalysisID StopAfter,
+                           AnalysisID StartBefore, AnalysisID StartAfter,
+                           AnalysisID StopAfter,
                            MachineFunctionInitializer *MFInitializer) override;
 };
 
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
new file mode 100644
index 000000000000..cb7e633fb82f
--- /dev/null
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -0,0 +1,1127 @@
+//===--- BitTracker.cpp ---------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// SSA-based bit propagation.
+//
+// The purpose of this code is, for a given virtual register, to provide
+// information about the value of each bit in the register. The values
+// of bits are represented by the class BitValue, and take one of four
+// cases: 0, 1, "ref" and "bottom". The 0 and 1 are rather clear, the
+// "ref" value means that the bit is a copy of another bit (which itself
+// cannot be a copy of yet another bit---such chains are not allowed).
+// A "ref" value is associated with a BitRef structure, which indicates
+// which virtual register, and which bit in that register is the origin
+// of the value. For example, given an instruction
+//   vreg2 = ASL vreg1, 1
+// assuming that nothing is known about bits of vreg1, bit 1 of vreg2
+// will be a "ref" to (vreg1, 0). If there is a subsequent instruction
+//   vreg3 = ASL vreg2, 2
+// then bit 3 of vreg3 will be a "ref" to (vreg1, 0) as well.
+// The "bottom" case means that the bit's value cannot be determined,
+// and that this virtual register actually defines it. The "bottom" case
+// is discussed in detail in BitTracker.h. In fact, "bottom" is a "ref
+// to self", so for the vreg1 above, the bit 0 of it will be a "ref" to
+// (vreg1, 0), bit 1 will be a "ref" to (vreg1, 1), etc.
+//
+// The tracker implements the Wegman-Zadeck algorithm, originally developed
+// for SSA-based constant propagation. Each register is represented as
+// a sequence of bits, with the convention that bit 0 is the least signi-
+// ficant bit. Each bit is propagated individually. The class RegisterCell
+// implements the register's representation, and is also the subject of
+// the lattice operations in the tracker.
+//
+// The intended usage of the bit tracker is to create a target-specific
+// machine instruction evaluator, pass the evaluator to the BitTracker
+// object, and run the tracker. The tracker will then collect the bit
+// value information for a given machine function. After that, it can be
+// queried for the cells for each virtual register.
+// Sample code:
+//   const TargetSpecificEvaluator TSE(TRI, MRI);
+//   BitTracker BT(TSE, MF);
+//   BT.run();
+//   ...
+//   unsigned Reg = interestingRegister();
+//   RegisterCell RC = BT.get(Reg);
+//   if (RC[3].is(1))
+//      Reg0bit3 = 1;
+//
+// The code below is intended to be fully target-independent.
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "BitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+namespace {
+  // Local trickery to pretty print a register (without the whole "%vreg"
+  // business).
+  struct printv {
+    printv(unsigned r) : R(r) {}
+    unsigned R;
+  };
+  raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
+    if (PV.R)
+      OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
+    else
+      OS << 's';
+    return OS;
+  }
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::BitValue &BV) {
+  switch (BV.Type) {
+    case BT::BitValue::Top:
+      OS << 'T';
+      break;
+    case BT::BitValue::Zero:
+      OS << '0';
+      break;
+    case BT::BitValue::One:
+      OS << '1';
+      break;
+    case BT::BitValue::Ref:
+      OS << printv(BV.RefI.Reg) << '[' << BV.RefI.Pos << ']';
+      break;
+  }
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const BT::RegisterCell &RC) {
+  unsigned n = RC.Bits.size();
+  OS << "{ w:" << n;
+  // Instead of printing each bit value individually, try to group them
+  // into logical segments, such as sequences of 0 or 1 bits or references
+  // to consecutive bits (e.g. "bits 3-5 are same as bits 7-9 of reg xyz").
+  // "Start" will be the index of the beginning of the most recent segment.
+  unsigned Start = 0;
+  bool SeqRef = false;    // A sequence of refs to consecutive bits.
+  bool ConstRef = false;  // A sequence of refs to the same bit.
+
+  for (unsigned i = 1, n = RC.Bits.size(); i < n; ++i) {
+    const BT::BitValue &V = RC[i];
+    const BT::BitValue &SV = RC[Start];
+    bool IsRef = (V.Type == BT::BitValue::Ref);
+    // If the current value is the same as Start, skip to the next one.
+    if (!IsRef && V == SV)
+      continue;
+    if (IsRef && SV.Type == BT::BitValue::Ref && V.RefI.Reg == SV.RefI.Reg) {
+      if (Start+1 == i) {
+        SeqRef = (V.RefI.Pos == SV.RefI.Pos+1);
+        ConstRef = (V.RefI.Pos == SV.RefI.Pos);
+      }
+      if (SeqRef && V.RefI.Pos == SV.RefI.Pos+(i-Start))
+        continue;
+      if (ConstRef && V.RefI.Pos == SV.RefI.Pos)
+        continue;
+    }
+
+    // The current value is different. Print the previous one and reset
+    // the Start.
+    OS << " [" << Start;
+    unsigned Count = i - Start;
+    if (Count == 1) {
+      OS << "]:" << SV;
+    } else {
+      OS << '-' << i-1 << "]:";
+      if (SV.Type == BT::BitValue::Ref && SeqRef)
+        OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+           << SV.RefI.Pos+(Count-1) << ']';
+      else
+        OS << SV;
+    }
+    Start = i;
+    SeqRef = ConstRef = false;
+  }
+
+  OS << " [" << Start;
+  unsigned Count = n - Start;
+  if (n-Start == 1) {
+    OS << "]:" << RC[Start];
+  } else {
+    OS << '-' << n-1 << "]:";
+    const BT::BitValue &SV = RC[Start];
+    if (SV.Type == BT::BitValue::Ref && SeqRef)
+      OS << printv(SV.RefI.Reg) << '[' << SV.RefI.Pos << '-'
+         << SV.RefI.Pos+(Count-1) << ']';
+    else
+      OS << SV;
+  }
+  OS << " }";
+
+  return OS;
+}
+
+BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
+    : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
+
+BitTracker::~BitTracker() {
+  delete &Map;
+}
+
+
+// If we were allowed to update a cell for a part of a register, the meet
+// operation would need to be parametrized by the register number and the
+// exact part of the register, so that the computer BitRefs correspond to
+// the actual bits of the "self" register.
+// While this cannot happen in the current implementation, I'm not sure
+// if this should be ruled out in the future.
+bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
+  // An example when "meet" can be invoked with SelfR == 0 is a phi node
+  // with a physical register as an operand.
+  assert(SelfR == 0 || TargetRegisterInfo::isVirtualRegister(SelfR));
+  bool Changed = false;
+  for (uint16_t i = 0, n = Bits.size(); i < n; ++i) {
+    const BitValue &RCV = RC[i];
+    Changed |= Bits[i].meet(RCV, BitRef(SelfR, i));
+  }
+  return Changed;
+}
+
+
+// Insert the entire cell RC into the current cell at position given by M.
+BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
+      const BitMask &M) {
+  uint16_t B = M.first(), E = M.last(), W = width();
+  // Sanity: M must be a valid mask for *this.
+  assert(B < W && E < W);
+  // Sanity: the masked part of *this must have the same number of bits
+  // as the source.
+  assert(B > E || E-B+1 == RC.width());      // B <= E  =>  E-B+1 = |RC|.
+  assert(B <= E || E+(W-B)+1 == RC.width()); // E < B   =>  E+(W-B)+1 = |RC|.
+  if (B <= E) {
+    for (uint16_t i = 0; i <= E-B; ++i)
+      Bits[i+B] = RC[i];
+  } else {
+    for (uint16_t i = 0; i < W-B; ++i)
+      Bits[i+B] = RC[i];
+    for (uint16_t i = 0; i <= E; ++i)
+      Bits[i] = RC[i+(W-B)];
+  }
+  return *this;
+}
+
+
+BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
+  uint16_t B = M.first(), E = M.last(), W = width();
+  assert(B < W && E < W);
+  if (B <= E) {
+    RegisterCell RC(E-B+1);
+    for (uint16_t i = B; i <= E; ++i)
+      RC.Bits[i-B] = Bits[i];
+    return RC;
+  }
+
+  RegisterCell RC(E+(W-B)+1);
+  for (uint16_t i = 0; i < W-B; ++i)
+    RC.Bits[i] = Bits[i+B];
+  for (uint16_t i = 0; i <= E; ++i)
+    RC.Bits[i+(W-B)] = Bits[i];
+  return RC;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
+  // Rotate left (i.e. towards increasing bit indices).
+  // Swap the two parts:  [0..W-Sh-1] [W-Sh..W-1]
+  uint16_t W = width();
+  Sh = Sh % W;
+  if (Sh == 0)
+    return *this;
+
+  RegisterCell Tmp(W-Sh);
+  // Tmp = [0..W-Sh-1].
+  for (uint16_t i = 0; i < W-Sh; ++i)
+    Tmp[i] = Bits[i];
+  // Shift [W-Sh..W-1] to [0..Sh-1].
+  for (uint16_t i = 0; i < Sh; ++i)
+    Bits[i] = Bits[W-Sh+i];
+  // Copy Tmp to [Sh..W-1].
+  for (uint16_t i = 0; i < W-Sh; ++i)
+    Bits[i+Sh] = Tmp.Bits[i];
+  return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
+      const BitValue &V) {
+  assert(B <= E);
+  while (B < E)
+    Bits[B++] = V;
+  return *this;
+}
+
+
+BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
+  // Append the cell given as the argument to the "this" cell.
+  // Bit 0 of RC becomes bit W of the result, where W is this->width().
+  uint16_t W = width(), WRC = RC.width();
+  Bits.resize(W+WRC);
+  for (uint16_t i = 0; i < WRC; ++i)
+    Bits[i+W] = RC.Bits[i];
+  return *this;
+}
+
+
+uint16_t BT::RegisterCell::ct(bool B) const {
+  uint16_t W = width();
+  uint16_t C = 0;
+  BitValue V = B;
+  while (C < W && Bits[C] == V)
+    C++;
+  return C;
+}
+
+
+uint16_t BT::RegisterCell::cl(bool B) const {
+  uint16_t W = width();
+  uint16_t C = 0;
+  BitValue V = B;
+  while (C < W && Bits[W-(C+1)] == V)
+    C++;
+  return C;
+}
+
+
+bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
+  uint16_t W = Bits.size();
+  if (RC.Bits.size() != W)
+    return false;
+  for (uint16_t i = 0; i < W; ++i)
+    if (Bits[i] != RC[i])
+      return false;
+  return true;
+}
+
+
+uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
+  // The general problem is with finding a register class that corresponds
+  // to a given reference reg:sub. There can be several such classes, and
+  // since we only care about the register size, it does not matter which
+  // such class we would find.
+  // The easiest way to accomplish what we want is to
+  // 1. find a physical register PhysR from the same class as RR.Reg,
+  // 2. find a physical register PhysS that corresponds to PhysR:RR.Sub,
+  // 3. find a register class that contains PhysS.
+  unsigned PhysR;
+  if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+    const TargetRegisterClass *VC = MRI.getRegClass(RR.Reg);
+    assert(VC->begin() != VC->end() && "Empty register class");
+    PhysR = *VC->begin();
+  } else {
+    assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
+    PhysR = RR.Reg;
+  }
+
+  unsigned PhysS = (RR.Sub == 0) ? PhysR : TRI.getSubReg(PhysR, RR.Sub);
+  const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PhysS);
+  uint16_t BW = RC->getSize()*8;
+  return BW;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
+      const CellMapType &M) const {
+  uint16_t BW = getRegBitWidth(RR);
+
+  // Physical registers are assumed to be present in the map with an unknown
+  // value. Don't actually insert anything in the map, just return the cell.
+  if (TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+    return RegisterCell::self(0, BW);
+
+  assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+  // For virtual registers that belong to a class that is not tracked,
+  // generate an "unknown" value as well.
+  const TargetRegisterClass *C = MRI.getRegClass(RR.Reg);
+  if (!track(C))
+    return RegisterCell::self(0, BW);
+
+  CellMapType::const_iterator F = M.find(RR.Reg);
+  if (F != M.end()) {
+    if (!RR.Sub)
+      return F->second;
+    BitMask M = mask(RR.Reg, RR.Sub);
+    return F->second.extract(M);
+  }
+  // If not found, create a "top" entry, but do not insert it in the map.
+  return RegisterCell::top(BW);
+}
+
+
+void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
+      CellMapType &M) const {
+  // While updating the cell map can be done in a meaningful way for
+  // a part of a register, it makes little sense to implement it as the
+  // SSA representation would never contain such "partial definitions".
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return;
+  assert(RR.Sub == 0 && "Unexpected sub-register in definition");
+  // Eliminate all ref-to-reg-0 bit values: replace them with "self".
+  for (unsigned i = 0, n = RC.width(); i < n; ++i) {
+    const BitValue &V = RC[i];
+    if (V.Type == BitValue::Ref && V.RefI.Reg == 0)
+      RC[i].RefI = BitRef(RR.Reg, i);
+  }
+  M[RR.Reg] = RC;
+}
+
+
+// Check if the cell represents a compile-time integer value.
+bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
+  uint16_t W = A.width();
+  for (uint16_t i = 0; i < W; ++i)
+    if (!A[i].is(0) && !A[i].is(1))
+      return false;
+  return true;
+}
+
+
+// Convert a cell to the integer value. The result must fit in uint64_t.
+uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
+  assert(isInt(A));
+  uint64_t Val = 0;
+  uint16_t W = A.width();
+  for (uint16_t i = 0; i < W; ++i) {
+    Val <<= 1;
+    Val |= A[i].is(1);
+  }
+  return Val;
+}
+
+
+// Evaluator helper functions. These implement some common operation on
+// register cells that can be used to implement target-specific instructions
+// in a target-specific evaluator.
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
+  RegisterCell Res(W);
+  // For bits beyond the 63rd, this will generate the sign bit of V.
+  for (uint16_t i = 0; i < W; ++i) {
+    Res[i] = BitValue(V & 1);
+    V >>= 1;
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
+  APInt A = CI->getValue();
+  uint16_t BW = A.getBitWidth();
+  assert((unsigned)BW == A.getBitWidth() && "BitWidth overflow");
+  RegisterCell Res(BW);
+  for (uint16_t i = 0; i < BW; ++i)
+    Res[i] = A[i];
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  bool Carry = false;
+  uint16_t I;
+  for (I = 0; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    if (!V1.num() || !V2.num())
+      break;
+    unsigned S = bool(V1) + bool(V2) + Carry;
+    Res[I] = BitValue(S & 1);
+    Carry = (S > 1);
+  }
+  for (; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    // If the next bit is same as Carry, the result will be 0 plus the
+    // other bit. The Carry bit will remain unchanged.
+    if (V1.is(Carry))
+      Res[I] = BitValue::ref(V2);
+    else if (V2.is(Carry))
+      Res[I] = BitValue::ref(V1);
+    else
+      break;
+  }
+  for (; I < W; ++I)
+    Res[I] = BitValue::self();
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  bool Borrow = false;
+  uint16_t I;
+  for (I = 0; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    if (!V1.num() || !V2.num())
+      break;
+    unsigned S = bool(V1) - bool(V2) - Borrow;
+    Res[I] = BitValue(S & 1);
+    Borrow = (S > 1);
+  }
+  for (; I < W; ++I) {
+    const BitValue &V1 = A1[I];
+    const BitValue &V2 = A2[I];
+    if (V1.is(Borrow)) {
+      Res[I] = BitValue::ref(V2);
+      break;
+    }
+    if (V2.is(Borrow))
+      Res[I] = BitValue::ref(V1);
+    else
+      break;
+  }
+  for (; I < W; ++I)
+    Res[I] = BitValue::self();
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width() + A2.width();
+  uint16_t Z = A1.ct(0) + A2.ct(0);
+  RegisterCell Res(W);
+  Res.fill(0, Z, BitValue::Zero);
+  Res.fill(Z, W, BitValue::self());
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width() + A2.width();
+  uint16_t Z = A1.ct(0) + A2.ct(0);
+  RegisterCell Res(W);
+  Res.fill(0, Z, BitValue::Zero);
+  Res.fill(Z, W, BitValue::self());
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
+      uint16_t Sh) const {
+  assert(Sh <= A1.width());
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res.rol(Sh);
+  Res.fill(0, Sh, BitValue::Zero);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
+      uint16_t Sh) const {
+  uint16_t W = A1.width();
+  assert(Sh <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res.rol(W-Sh);
+  Res.fill(W-Sh, W, BitValue::Zero);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
+      uint16_t Sh) const {
+  uint16_t W = A1.width();
+  assert(Sh <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  BitValue Sign = Res[W-1];
+  Res.rol(W-Sh);
+  Res.fill(W-Sh, W, Sign);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V1 = A1[i];
+    const BitValue &V2 = A2[i];
+    if (V1.is(1))
+      Res[i] = BitValue::ref(V2);
+    else if (V2.is(1))
+      Res[i] = BitValue::ref(V1);
+    else if (V1.is(0) || V2.is(0))
+      Res[i] = BitValue::Zero;
+    else if (V1 == V2)
+      Res[i] = V1;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V1 = A1[i];
+    const BitValue &V2 = A2[i];
+    if (V1.is(1) || V2.is(1))
+      Res[i] = BitValue::One;
+    else if (V1.is(0))
+      Res[i] = BitValue::ref(V2);
+    else if (V2.is(0))
+      Res[i] = BitValue::ref(V1);
+    else if (V1 == V2)
+      Res[i] = V1;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
+      const RegisterCell &A2) const {
+  uint16_t W = A1.width();
+  assert(W == A2.width());
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V1 = A1[i];
+    const BitValue &V2 = A2[i];
+    if (V1.is(0))
+      Res[i] = BitValue::ref(V2);
+    else if (V2.is(0))
+      Res[i] = BitValue::ref(V1);
+    else if (V1 == V2)
+      Res[i] = BitValue::Zero;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
+  uint16_t W = A1.width();
+  RegisterCell Res(W);
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitValue &V = A1[i];
+    if (V.is(0))
+      Res[i] = BitValue::One;
+    else if (V.is(1))
+      Res[i] = BitValue::Zero;
+    else
+      Res[i] = BitValue::self();
+  }
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
+      uint16_t BitN) const {
+  assert(BitN < A1.width());
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res[BitN] = BitValue::One;
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
+      uint16_t BitN) const {
+  assert(BitN < A1.width());
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res[BitN] = BitValue::Zero;
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
+      uint16_t W) const {
+  uint16_t C = A1.cl(B), AW = A1.width();
+  // If the last leading non-B bit is not a constant, then we don't know
+  // the real count.
+  if ((C < AW && A1[AW-1-C].num()) || C == AW)
+    return eIMM(C, W);
+  return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
+      uint16_t W) const {
+  uint16_t C = A1.ct(B), AW = A1.width();
+  // If the last trailing non-B bit is not a constant, then we don't know
+  // the real count.
+  if ((C < AW && A1[C].num()) || C == AW)
+    return eIMM(C, W);
+  return RegisterCell::self(0, W);
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
+      uint16_t FromN) const {
+  uint16_t W = A1.width();
+  assert(FromN <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  BitValue Sign = Res[FromN-1];
+  // Sign-extend "inreg".
+  Res.fill(FromN, W, Sign);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
+      uint16_t FromN) const {
+  uint16_t W = A1.width();
+  assert(FromN <= W);
+  RegisterCell Res = RegisterCell::ref(A1);
+  Res.fill(FromN, W, BitValue::Zero);
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
+      uint16_t B, uint16_t E) const {
+  uint16_t W = A1.width();
+  assert(B < W && E <= W);
+  if (B == E)
+    return RegisterCell(0);
+  uint16_t Last = (E > 0) ? E-1 : W-1;
+  RegisterCell Res = RegisterCell::ref(A1).extract(BT::BitMask(B, Last));
+  // Return shorter cell.
+  return Res;
+}
+
+
+BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
+      const RegisterCell &A2, uint16_t AtN) const {
+  uint16_t W1 = A1.width(), W2 = A2.width();
+  (void)W1;
+  assert(AtN < W1 && AtN+W2 <= W1);
+  // Copy bits from A1, insert A2 at position AtN.
+  RegisterCell Res = RegisterCell::ref(A1);
+  if (W2 > 0)
+    Res.insert(RegisterCell::ref(A2), BT::BitMask(AtN, AtN+W2-1));
+  return Res;
+}
+
+
+BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
+  assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
+  uint16_t W = getRegBitWidth(Reg);
+  assert(W > 0 && "Cannot generate mask for empty register");
+  return BitMask(0, W-1);
+}
+
+
+bool BT::MachineEvaluator::evaluate(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case TargetOpcode::REG_SEQUENCE: {
+      RegisterRef RD = MI->getOperand(0);
+      assert(RD.Sub == 0);
+      RegisterRef RS = MI->getOperand(1);
+      unsigned SS = MI->getOperand(2).getImm();
+      RegisterRef RT = MI->getOperand(3);
+      unsigned ST = MI->getOperand(4).getImm();
+      assert(SS != ST);
+
+      uint16_t W = getRegBitWidth(RD);
+      RegisterCell Res(W);
+      Res.insert(RegisterCell::ref(getCell(RS, Inputs)), mask(RD.Reg, SS));
+      Res.insert(RegisterCell::ref(getCell(RT, Inputs)), mask(RD.Reg, ST));
+      putCell(RD, Res, Outputs);
+      break;
+    }
+
+    case TargetOpcode::COPY: {
+      // COPY can transfer a smaller register into a wider one.
+      // If that is the case, fill the remaining high bits with 0.
+      RegisterRef RD = MI->getOperand(0);
+      RegisterRef RS = MI->getOperand(1);
+      assert(RD.Sub == 0);
+      uint16_t WD = getRegBitWidth(RD);
+      uint16_t WS = getRegBitWidth(RS);
+      assert(WD >= WS);
+      RegisterCell Src = getCell(RS, Inputs);
+      RegisterCell Res(WD);
+      Res.insert(Src, BitMask(0, WS-1));
+      Res.fill(WS, WD, BitValue::Zero);
+      putCell(RD, Res, Outputs);
+      break;
+    }
+
+    default:
+      return false;
+  }
+
+  return true;
+}
+
+
+// Main W-Z implementation.
+
+void BT::visitPHI(const MachineInstr *PI) {
+  int ThisN = PI->getParent()->getNumber();
+  if (Trace)
+    dbgs() << "Visit FI(BB#" << ThisN << "): " << *PI;
+
+  const MachineOperand &MD = PI->getOperand(0);
+  assert(MD.getSubReg() == 0 && "Unexpected sub-register in definition");
+  RegisterRef DefRR(MD);
+  uint16_t DefBW = ME.getRegBitWidth(DefRR);
+
+  RegisterCell DefC = ME.getCell(DefRR, Map);
+  if (DefC == RegisterCell::self(DefRR.Reg, DefBW))    // XXX slow
+    return;
+
+  bool Changed = false;
+
+  for (unsigned i = 1, n = PI->getNumOperands(); i < n; i += 2) {
+    const MachineBasicBlock *PB = PI->getOperand(i+1).getMBB();
+    int PredN = PB->getNumber();
+    if (Trace)
+      dbgs() << "  edge BB#" << PredN << "->BB#" << ThisN;
+    if (!EdgeExec.count(CFGEdge(PredN, ThisN))) {
+      if (Trace)
+        dbgs() << " not executable\n";
+      continue;
+    }
+
+    RegisterRef RU = PI->getOperand(i);
+    RegisterCell ResC = ME.getCell(RU, Map);
+    if (Trace)
+      dbgs() << " input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+             << " cell: " << ResC << "\n";
+    Changed |= DefC.meet(ResC, DefRR.Reg);
+  }
+
+  if (Changed) {
+    if (Trace)
+      dbgs() << "Output: " << PrintReg(DefRR.Reg, &ME.TRI, DefRR.Sub)
+             << " cell: " << DefC << "\n";
+    ME.putCell(DefRR, DefC, Map);
+    visitUsesOf(DefRR.Reg);
+  }
+}
+
+
+void BT::visitNonBranch(const MachineInstr *MI) {
+  if (Trace) {
+    int ThisN = MI->getParent()->getNumber();
+    dbgs() << "Visit MI(BB#" << ThisN << "): " << *MI;
+  }
+  if (MI->isDebugValue())
+    return;
+  assert(!MI->isBranch() && "Unexpected branch instruction");
+
+  CellMapType ResMap;
+  bool Eval = ME.evaluate(MI, Map, ResMap);
+
+  if (Trace && Eval) {
+    for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      RegisterRef RU(MO);
+      dbgs() << "  input reg: " << PrintReg(RU.Reg, &ME.TRI, RU.Sub)
+             << " cell: " << ME.getCell(RU, Map) << "\n";
+    }
+    dbgs() << "Outputs:\n";
+    for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end();
+         I != E; ++I) {
+      RegisterRef RD(I->first);
+      dbgs() << "  " << PrintReg(I->first, &ME.TRI) << " cell: "
+             << ME.getCell(RD, ResMap) << "\n";
+    }
+  }
+
+  // Iterate over all definitions of the instruction, and update the
+  // cells accordingly.
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    // Visit register defs only.
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    RegisterRef RD(MO);
+    assert(RD.Sub == 0 && "Unexpected sub-register in definition");
+    if (!TargetRegisterInfo::isVirtualRegister(RD.Reg))
+      continue;
+
+    bool Changed = false;
+    if (!Eval || !ResMap.has(RD.Reg)) {
+      // Set to "ref" (aka "bottom").
+      uint16_t DefBW = ME.getRegBitWidth(RD);
+      RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW);
+      if (RefC != ME.getCell(RD, Map)) {
+        ME.putCell(RD, RefC, Map);
+        Changed = true;
+      }
+    } else {
+      RegisterCell DefC = ME.getCell(RD, Map);
+      RegisterCell ResC = ME.getCell(RD, ResMap);
+      // This is a non-phi instruction, so the values of the inputs come
+      // from the same registers each time this instruction is evaluated.
+      // During the propagation, the values of the inputs can become lowered
+      // in the sense of the lattice operation, which may cause different
+      // results to be calculated in subsequent evaluations. This should
+      // not cause the bottoming of the result in the map, since the new
+      // result is already reflecting the lowered inputs.
+      for (uint16_t i = 0, w = DefC.width(); i < w; ++i) {
+        BitValue &V = DefC[i];
+        // Bits that are already "bottom" should not be updated.
+        if (V.Type == BitValue::Ref && V.RefI.Reg == RD.Reg)
+          continue;
+        // Same for those that are identical in DefC and ResC.
+        if (V == ResC[i])
+          continue;
+        V = ResC[i];
+        Changed = true;
+      }
+      if (Changed)
+        ME.putCell(RD, DefC, Map);
+    }
+    if (Changed)
+      visitUsesOf(RD.Reg);
+  }
+}
+
+
+void BT::visitBranchesFrom(const MachineInstr *BI) {
+  const MachineBasicBlock &B = *BI->getParent();
+  MachineBasicBlock::const_iterator It = BI, End = B.end();
+  BranchTargetList Targets, BTs;
+  bool FallsThrough = true, DefaultToAll = false;
+  int ThisN = B.getNumber();
+
+  do {
+    BTs.clear();
+    const MachineInstr *MI = &*It;
+    if (Trace)
+      dbgs() << "Visit BR(BB#" << ThisN << "): " << *MI;
+    assert(MI->isBranch() && "Expecting branch instruction");
+    InstrExec.insert(MI);
+    bool Eval = ME.evaluate(MI, Map, BTs, FallsThrough);
+    if (!Eval) {
+      // If the evaluation failed, we will add all targets. Keep going in
+      // the loop to mark all executable branches as such.
+      DefaultToAll = true;
+      FallsThrough = true;
+      if (Trace)
+        dbgs() << "  failed to evaluate: will add all CFG successors\n";
+    } else if (!DefaultToAll) {
+      // If evaluated successfully add the targets to the cumulative list.
+      if (Trace) {
+        dbgs() << "  adding targets:";
+        for (unsigned i = 0, n = BTs.size(); i < n; ++i)
+          dbgs() << " BB#" << BTs[i]->getNumber();
+        if (FallsThrough)
+          dbgs() << "\n  falls through\n";
+        else
+          dbgs() << "\n  does not fall through\n";
+      }
+      Targets.insert(BTs.begin(), BTs.end());
+    }
+    ++It;
+  } while (FallsThrough && It != End);
+
+  typedef MachineBasicBlock::const_succ_iterator succ_iterator;
+  if (!DefaultToAll) {
+    // Need to add all CFG successors that lead to EH landing pads.
+    // There won't be explicit branches to these blocks, but they must
+    // be processed.
+    for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) {
+      const MachineBasicBlock *SB = *I;
+      if (SB->isLandingPad())
+        Targets.insert(SB);
+    }
+    if (FallsThrough) {
+      MachineFunction::const_iterator BIt = &B;
+      MachineFunction::const_iterator Next = std::next(BIt);
+      if (Next != MF.end())
+        Targets.insert(&*Next);
+    }
+  } else {
+    for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I)
+      Targets.insert(*I);
+  }
+
+  for (unsigned i = 0, n = Targets.size(); i < n; ++i) {
+    int TargetN = Targets[i]->getNumber();
+    FlowQ.push(CFGEdge(ThisN, TargetN));
+  }
+}
+
+
+void BT::visitUsesOf(unsigned Reg) {
+  if (Trace)
+    dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
+
+  typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+  use_iterator End = MRI.use_nodbg_end();
+  for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) {
+    MachineInstr *UseI = I->getParent();
+    if (!InstrExec.count(UseI))
+      continue;
+    if (UseI->isPHI())
+      visitPHI(UseI);
+    else if (!UseI->isBranch())
+      visitNonBranch(UseI);
+    else
+      visitBranchesFrom(UseI);
+  }
+}
+
+
+BT::RegisterCell BT::get(RegisterRef RR) const {
+  return ME.getCell(RR, Map);
+}
+
+
+void BT::put(RegisterRef RR, const RegisterCell &RC) {
+  ME.putCell(RR, RC, Map);
+}
+
+
+// Replace all references to bits from OldRR with the corresponding bits
+// in NewRR.
+void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
+  assert(Map.has(OldRR.Reg) && "OldRR not present in map");
+  BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub);
+  BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub);
+  uint16_t OMB = OM.first(), OME = OM.last();
+  uint16_t NMB = NM.first(), NME = NM.last();
+  (void)NME;
+  assert((OME-OMB == NME-NMB) &&
+         "Substituting registers of different lengths");
+  for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
+    RegisterCell &RC = I->second;
+    for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+      BitValue &V = RC[i];
+      if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg)
+        continue;
+      if (V.RefI.Pos < OMB || V.RefI.Pos > OME)
+        continue;
+      V.RefI.Reg = NewRR.Reg;
+      V.RefI.Pos += NMB-OMB;
+    }
+  }
+}
+
+
+// Check if the block has been "executed" during propagation. (If not, the
+// block is dead, but it may still appear to be reachable.)
+bool BT::reached(const MachineBasicBlock *B) const {
+  int BN = B->getNumber();
+  assert(BN >= 0);
+  for (EdgeSetType::iterator I = EdgeExec.begin(), E = EdgeExec.end();
+       I != E; ++I) {
+    if (I->second == BN)
+      return true;
+  }
+  return false;
+}
+
+
+void BT::reset() {
+  EdgeExec.clear();
+  InstrExec.clear();
+  Map.clear();
+}
+
+
+void BT::run() {
+  reset();
+  assert(FlowQ.empty());
+
+  typedef GraphTraits<const MachineFunction*> MachineFlowGraphTraits;
+  const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF);
+
+  unsigned MaxBN = 0;
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    assert(I->getNumber() >= 0 && "Disconnected block");
+    unsigned BN = I->getNumber();
+    if (BN > MaxBN)
+      MaxBN = BN;
+  }
+
+  // Keep track of visited blocks.
+  BitVector BlockScanned(MaxBN+1);
+
+  int EntryN = Entry->getNumber();
+  // Generate a fake edge to get something to start with.
+  FlowQ.push(CFGEdge(-1, EntryN));
+
+  while (!FlowQ.empty()) {
+    CFGEdge Edge = FlowQ.front();
+    FlowQ.pop();
+
+    if (EdgeExec.count(Edge))
+      continue;
+    EdgeExec.insert(Edge);
+
+    const MachineBasicBlock &B = *MF.getBlockNumbered(Edge.second);
+    MachineBasicBlock::const_iterator It = B.begin(), End = B.end();
+    // Visit PHI nodes first.
+    while (It != End && It->isPHI()) {
+      const MachineInstr *PI = &*It++;
+      InstrExec.insert(PI);
+      visitPHI(PI);
+    }
+
+    // If this block has already been visited through a flow graph edge,
+    // then the instructions have already been processed. Any updates to
+    // the cells would now only happen through visitUsesOf...
+    if (BlockScanned[Edge.second])
+      continue;
+    BlockScanned[Edge.second] = true;
+
+    // Visit non-branch instructions.
+    while (It != End && !It->isBranch()) {
+      const MachineInstr *MI = &*It++;
+      InstrExec.insert(MI);
+      visitNonBranch(MI);
+    }
+    // If block end has been reached, add the fall-through edge to the queue.
+    if (It == End) {
+      MachineFunction::const_iterator BIt = &B;
+      MachineFunction::const_iterator Next = std::next(BIt);
+      if (Next != MF.end()) {
+        int ThisN = B.getNumber();
+        int NextN = Next->getNumber();
+        FlowQ.push(CFGEdge(ThisN, NextN));
+      }
+    } else {
+      // Handle the remaining sequence of branches. This function will update
+      // the work queue.
+      visitBranchesFrom(It);
+    }
+  } // while (!FlowQ->empty())
+
+  if (Trace) {
+    dbgs() << "Cells after propagation:\n";
+    for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
+      dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
+  }
+}
+
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
new file mode 100644
index 000000000000..ed002a794d66
--- /dev/null
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -0,0 +1,449 @@
+//===--- BitTracker.h -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITTRACKER_H
+#define BITTRACKER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+#include <map>
+#include <queue>
+#include <set>
+
+namespace llvm {
+  class ConstantInt;
+  class MachineRegisterInfo;
+  class MachineBasicBlock;
+  class MachineInstr;
+  class MachineOperand;
+  class raw_ostream;
+
+struct BitTracker {
+  struct BitRef;
+  struct RegisterRef;
+  struct BitValue;
+  struct BitMask;
+  struct RegisterCell;
+  struct MachineEvaluator;
+
+  typedef SetVector<const MachineBasicBlock *> BranchTargetList;
+
+  struct CellMapType : public std::map<unsigned,RegisterCell> {
+    bool has(unsigned Reg) const;
+  };
+
+  BitTracker(const MachineEvaluator &E, MachineFunction &F);
+  ~BitTracker();
+
+  void run();
+  void trace(bool On = false) { Trace = On; }
+  bool has(unsigned Reg) const;
+  const RegisterCell &lookup(unsigned Reg) const;
+  RegisterCell get(RegisterRef RR) const;
+  void put(RegisterRef RR, const RegisterCell &RC);
+  void subst(RegisterRef OldRR, RegisterRef NewRR);
+  bool reached(const MachineBasicBlock *B) const;
+
+private:
+  void visitPHI(const MachineInstr *PI);
+  void visitNonBranch(const MachineInstr *MI);
+  void visitBranchesFrom(const MachineInstr *BI);
+  void visitUsesOf(unsigned Reg);
+  void reset();
+
+  typedef std::pair<int,int> CFGEdge;
+  typedef std::set<CFGEdge> EdgeSetType;
+  typedef std::set<const MachineInstr *> InstrSetType;
+  typedef std::queue<CFGEdge> EdgeQueueType;
+
+  EdgeSetType EdgeExec;       // Executable flow graph edges.
+  InstrSetType InstrExec;     // Executable instructions.
+  EdgeQueueType FlowQ;        // Work queue of CFG edges.
+  bool Trace;                 // Enable tracing for debugging.
+
+  const MachineEvaluator &ME;
+  MachineFunction &MF;
+  MachineRegisterInfo &MRI;
+  CellMapType &Map;
+};
+
+
+// Abstraction of a reference to bit at position Pos from a register Reg.
+struct BitTracker::BitRef {
+  BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+  BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {}
+  bool operator== (const BitRef &BR) const {
+    // If Reg is 0, disregard Pos.
+    return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
+  }
+  unsigned Reg;
+  uint16_t Pos;
+};
+
+
+// Abstraction of a register reference in MachineOperand.  It contains the
+// register number and the subregister index.
+struct BitTracker::RegisterRef {
+  RegisterRef(unsigned R = 0, unsigned S = 0)
+    : Reg(R), Sub(S) {}
+  RegisterRef(const MachineOperand &MO)
+      : Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+  unsigned Reg, Sub;
+};
+
+
+// Value that a single bit can take.  This is outside of the context of
+// any register, it is more of an abstraction of the two-element set of
+// possible bit values.  One extension here is the "Ref" type, which
+// indicates that this bit takes the same value as the bit described by
+// RefInfo.
+struct BitTracker::BitValue {
+  enum ValueType {
+    Top,    // Bit not yet defined.
+    Zero,   // Bit = 0.
+    One,    // Bit = 1.
+    Ref     // Bit value same as the one described in RefI.
+    // Conceptually, there is no explicit "bottom" value: the lattice's
+    // bottom will be expressed as a "ref to itself", which, in the context
+    // of registers, could be read as "this value of this bit is defined by
+    // this bit".
+    // The ordering is:
+    //   x <= Top,
+    //   Self <= x, where "Self" is "ref to itself".
+    // This makes the value lattice different for each virtual register
+    // (even for each bit in the same virtual register), since the "bottom"
+    // for one register will be a simple "ref" for another register.
+    // Since we do not store the "Self" bit and register number, the meet
+    // operation will need to take it as a parameter.
+    //
+    // In practice there is a special case for values that are not associa-
+    // ted with any specific virtual register. An example would be a value
+    // corresponding to a bit of a physical register, or an intermediate
+    // value obtained in some computation (such as instruction evaluation).
+    // Such cases are identical to the usual Ref type, but the register
+    // number is 0. In such case the Pos field of the reference is ignored.
+    //
+    // What is worthy of notice is that in value V (that is a "ref"), as long
+    // as the RefI.Reg is not 0, it may actually be the same register as the
+    // one in which V will be contained.  If the RefI.Pos refers to the posi-
+    // tion of V, then V is assumed to be "bottom" (as a "ref to itself"),
+    // otherwise V is taken to be identical to the referenced bit of the
+    // same register.
+    // If RefI.Reg is 0, however, such a reference to the same register is
+    // not possible.  Any value V that is a "ref", and whose RefI.Reg is 0
+    // is treated as "bottom".
+  };
+  ValueType Type;
+  BitRef RefI;
+
+  BitValue(ValueType T = Top) : Type(T) {}
+  BitValue(bool B) : Type(B ? One : Zero) {}
+  BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {}
+  BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {}
+
+  bool operator== (const BitValue &V) const {
+    if (Type != V.Type)
+      return false;
+    if (Type == Ref && !(RefI == V.RefI))
+      return false;
+    return true;
+  }
+  bool operator!= (const BitValue &V) const {
+    return !operator==(V);
+  }
+  bool is(unsigned T) const {
+    assert(T == 0 || T == 1);
+    return T == 0 ? Type == Zero
+                  : (T == 1 ? Type == One : false);
+  }
+
+  // The "meet" operation is the "." operation in a semilattice (L, ., T, B):
+  // (1)  x.x = x
+  // (2)  x.y = y.x
+  // (3)  x.(y.z) = (x.y).z
+  // (4)  x.T = x  (i.e. T = "top")
+  // (5)  x.B = B  (i.e. B = "bottom")
+  //
+  // This "meet" function will update the value of the "*this" object with
+  // the newly calculated one, and return "true" if the value of *this has
+  // changed, and "false" otherwise.
+  // To prove that it satisfies the conditions (1)-(5), it is sufficient
+  // to show that a relation
+  //   x <= y  <=>  x.y = x
+  // defines a partial order (i.e. that "meet" is same as "infimum").
+  bool meet(const BitValue &V, const BitRef &Self) {
+    // First, check the cases where there is nothing to be done.
+    if (Type == Ref && RefI == Self)    // Bottom.meet(V) = Bottom (i.e. This)
+      return false;
+    if (V.Type == Top)                  // This.meet(Top) = This
+      return false;
+    if (*this == V)                     // This.meet(This) = This
+      return false;
+
+    // At this point, we know that the value of "this" will change.
+    // If it is Top, it will become the same as V, otherwise it will
+    // become "bottom" (i.e. Self).
+    if (Type == Top) {
+      Type = V.Type;
+      RefI = V.RefI;  // This may be irrelevant, but copy anyway.
+      return true;
+    }
+    // Become "bottom".
+    Type = Ref;
+    RefI = Self;
+    return true;
+  }
+
+  // Create a reference to the bit value V.
+  static BitValue ref(const BitValue &V);
+  // Create a "self".
+  static BitValue self(const BitRef &Self = BitRef());
+
+  bool num() const {
+    return Type == Zero || Type == One;
+  }
+  operator bool() const {
+    assert(Type == Zero || Type == One);
+    return Type == One;
+  }
+
+  friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
+};
+
+
+// This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
+inline BitTracker::BitValue
+BitTracker::BitValue::ref(const BitValue &V) {
+  if (V.Type != Ref)
+    return BitValue(V.Type);
+  if (V.RefI.Reg != 0)
+    return BitValue(V.RefI.Reg, V.RefI.Pos);
+  return self();
+}
+
+
+inline BitTracker::BitValue
+BitTracker::BitValue::self(const BitRef &Self) {
+  return BitValue(Self.Reg, Self.Pos);
+}
+
+
+// A sequence of bits starting from index B up to and including index E.
+// If E < B, the mask represents two sections: [0..E] and [B..W) where
+// W is the width of the register.
+struct BitTracker::BitMask {
+  BitMask() : B(0), E(0) {}
+  BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+  uint16_t first() const { return B; }
+  uint16_t last() const { return E; }
+private:
+  uint16_t B, E;
+};
+
+
+// Representation of a register: a list of BitValues.
+struct BitTracker::RegisterCell {
+  RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {}
+
+  uint16_t width() const {
+    return Bits.size();
+  }
+  const BitValue &operator[](uint16_t BitN) const {
+    assert(BitN < Bits.size());
+    return Bits[BitN];
+  }
+  BitValue &operator[](uint16_t BitN) {
+    assert(BitN < Bits.size());
+    return Bits[BitN];
+  }
+
+  bool meet(const RegisterCell &RC, unsigned SelfR);
+  RegisterCell &insert(const RegisterCell &RC, const BitMask &M);
+  RegisterCell extract(const BitMask &M) const;  // Returns a new cell.
+  RegisterCell &rol(uint16_t Sh);    // Rotate left.
+  RegisterCell &fill(uint16_t B, uint16_t E, const BitValue &V);
+  RegisterCell &cat(const RegisterCell &RC);  // Concatenate.
+  uint16_t cl(bool B) const;
+  uint16_t ct(bool B) const;
+
+  bool operator== (const RegisterCell &RC) const;
+  bool operator!= (const RegisterCell &RC) const {
+    return !operator==(RC);
+  }
+
+  const RegisterCell &operator=(const RegisterCell &RC) {
+    Bits = RC.Bits;
+    return *this;
+  }
+
+  // Generate a "ref" cell for the corresponding register. In the resulting
+  // cell each bit will be described as being the same as the corresponding
+  // bit in register Reg (i.e. the cell is "defined" by register Reg).
+  static RegisterCell self(unsigned Reg, uint16_t Width);
+  // Generate a "top" cell of given size.
+  static RegisterCell top(uint16_t Width);
+  // Generate a cell that is a "ref" to another cell.
+  static RegisterCell ref(const RegisterCell &C);
+
+private:
+  // The DefaultBitN is here only to avoid frequent reallocation of the
+  // memory in the vector.
+  static const unsigned DefaultBitN = 32;
+  typedef SmallVector<BitValue, DefaultBitN> BitValueList;
+  BitValueList Bits;
+
+  friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
+};
+
+
+inline bool BitTracker::has(unsigned Reg) const {
+  return Map.find(Reg) != Map.end();
+}
+
+
+inline const BitTracker::RegisterCell&
+BitTracker::lookup(unsigned Reg) const {
+  CellMapType::const_iterator F = Map.find(Reg);
+  assert(F != Map.end());
+  return F->second;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
+  RegisterCell RC(Width);
+  for (uint16_t i = 0; i < Width; ++i)
+    RC.Bits[i] = BitValue::self(BitRef(Reg, i));
+  return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::top(uint16_t Width) {
+  RegisterCell RC(Width);
+  for (uint16_t i = 0; i < Width; ++i)
+    RC.Bits[i] = BitValue(BitValue::Top);
+  return RC;
+}
+
+
+inline BitTracker::RegisterCell
+BitTracker::RegisterCell::ref(const RegisterCell &C) {
+  uint16_t W = C.width();
+  RegisterCell RC(W);
+  for (unsigned i = 0; i < W; ++i)
+    RC[i] = BitValue::ref(C[i]);
+  return RC;
+}
+
+
+inline bool BitTracker::CellMapType::has(unsigned Reg) const {
+  return find(Reg) != end();
+}
+
+// A class to evaluate target's instructions and update the cell maps.
+// This is used internally by the bit tracker.  A target that wants to
+// utilize this should implement the evaluation functions (noted below)
+// in a subclass of this class.
+struct BitTracker::MachineEvaluator {
+  MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
+      : TRI(T), MRI(M) {}
+  virtual ~MachineEvaluator() {}
+
+  uint16_t getRegBitWidth(const RegisterRef &RR) const;
+
+  RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
+  void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+  // A result of any operation should use refs to the source cells, not
+  // the cells directly. This function is a convenience wrapper to quickly
+  // generate a ref for a cell corresponding to a register reference.
+  RegisterCell getRef(const RegisterRef &RR, const CellMapType &M) const {
+    RegisterCell RC = getCell(RR, M);
+    return RegisterCell::ref(RC);
+  }
+
+  // Helper functions.
+  // Check if a cell is an immediate value (i.e. all bits are either 0 or 1).
+  bool isInt(const RegisterCell &A) const;
+  // Convert cell to an immediate value.
+  uint64_t toInt(const RegisterCell &A) const;
+
+  // Generate cell from an immediate value.
+  RegisterCell eIMM(int64_t V, uint16_t W) const;
+  RegisterCell eIMM(const ConstantInt *CI) const;
+
+  // Arithmetic.
+  RegisterCell eADD(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eSUB(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eMLS(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eMLU(const RegisterCell &A1, const RegisterCell &A2) const;
+
+  // Shifts.
+  RegisterCell eASL(const RegisterCell &A1, uint16_t Sh) const;
+  RegisterCell eLSR(const RegisterCell &A1, uint16_t Sh) const;
+  RegisterCell eASR(const RegisterCell &A1, uint16_t Sh) const;
+
+  // Logical.
+  RegisterCell eAND(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eORL(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eXOR(const RegisterCell &A1, const RegisterCell &A2) const;
+  RegisterCell eNOT(const RegisterCell &A1) const;
+
+  // Set bit, clear bit.
+  RegisterCell eSET(const RegisterCell &A1, uint16_t BitN) const;
+  RegisterCell eCLR(const RegisterCell &A1, uint16_t BitN) const;
+
+  // Count leading/trailing bits (zeros/ones).
+  RegisterCell eCLB(const RegisterCell &A1, bool B, uint16_t W) const;
+  RegisterCell eCTB(const RegisterCell &A1, bool B, uint16_t W) const;
+
+  // Sign/zero extension.
+  RegisterCell eSXT(const RegisterCell &A1, uint16_t FromN) const;
+  RegisterCell eZXT(const RegisterCell &A1, uint16_t FromN) const;
+
+  // Extract/insert
+  // XTR R,b,e:  extract bits from A1 starting at bit b, ending at e-1.
+  // INS R,S,b:  take R and replace bits starting from b with S.
+  RegisterCell eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const;
+  RegisterCell eINS(const RegisterCell &A1, const RegisterCell &A2,
+                    uint16_t AtN) const;
+
+  // User-provided functions for individual targets:
+
+  // Return a sub-register mask that indicates which bits in Reg belong
+  // to the subregister Sub. These bits are assumed to be contiguous in
+  // the super-register, and have the same ordering in the sub-register
+  // as in the super-register. It is valid to call this function with
+  // Sub == 0, in this case, the function should return a mask that spans
+  // the entire register Reg (which is what the default implementation
+  // does).
+  virtual BitMask mask(unsigned Reg, unsigned Sub) const;
+  // Indicate whether a given register class should be tracked.
+  virtual bool track(const TargetRegisterClass *RC) const { return true; }
+  // Evaluate a non-branching machine instruction, given the cell map with
+  // the input values. Place the results in the Outputs map. Return "true"
+  // if evaluation succeeded, "false" otherwise.
+  virtual bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+                        CellMapType &Outputs) const;
+  // Evaluate a branch, given the cell map with the input values. Fill out
+  // a list of all possible branch targets and indicate (through a flag)
+  // whether the branch could fall-through. Return "true" if this information
+  // has been successfully computed, "false" otherwise.
+  virtual bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+                        BranchTargetList &Targets, bool &FallsThru) const = 0;
+
+  const TargetRegisterInfo &TRI;
+  MachineRegisterInfo &MRI;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 758ccc741007..7ab2f0ba01df 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -12,13 +12,19 @@ tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(HexagonCommonTableGen)
 
 add_llvm_target(HexagonCodeGen
+  BitTracker.cpp
   HexagonAsmPrinter.cpp
+  HexagonBitTracker.cpp
   HexagonCFGOptimizer.cpp
+  HexagonCommonGEP.cpp
   HexagonCopyToCombine.cpp
   HexagonExpandCondsets.cpp
   HexagonExpandPredSpillCode.cpp
   HexagonFixupHwLoops.cpp
   HexagonFrameLowering.cpp
+  HexagonGenExtract.cpp
+  HexagonGenInsert.cpp
+  HexagonGenPredicate.cpp
   HexagonHardwareLoops.cpp
   HexagonInstrInfo.cpp
   HexagonISelDAGToDAG.cpp
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
new file mode 100644
index 000000000000..021e58a1d08a
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -0,0 +1,1174 @@
+//===--- HexagonBitTracker.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+typedef BitTracker BT;
+
+HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
+                                   MachineRegisterInfo &mri,
+                                   const HexagonInstrInfo &tii,
+                                   MachineFunction &mf)
+    : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) {
+  // Populate the VRX map (VR to extension-type).
+  // Go over all the formal parameters of the function. If a given parameter
+  // P is sign- or zero-extended, locate the virtual register holding that
+  // parameter and create an entry in the VRX map indicating the type of ex-
+  // tension (and the source type).
+  // This is a bit complicated to do accurately, since the memory layout in-
+  // formation is necessary to precisely determine whether an aggregate para-
+  // meter will be passed in a register or in memory. What is given in MRI
+  // is the association between the physical register that is live-in (i.e.
+  // holds an argument), and the virtual register that this value will be
+  // copied into. This, by itself, is not sufficient to map back the virtual
+  // register to a formal parameter from Function (since consecutive live-ins
+  // from MRI may not correspond to consecutive formal parameters from Func-
+  // tion). To avoid the complications with in-memory arguments, only consi-
+  // der the initial sequence of formal parameters that are known to be
+  // passed via registers.
+  unsigned AttrIdx = 0;
+  unsigned InVirtReg, InPhysReg = 0;
+  const Function &F = *MF.getFunction();
+  typedef Function::const_arg_iterator arg_iterator;
+  for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+    AttrIdx++;
+    const Argument &Arg = *I;
+    Type *ATy = Arg.getType();
+    unsigned Width = 0;
+    if (ATy->isIntegerTy())
+      Width = ATy->getIntegerBitWidth();
+    else if (ATy->isPointerTy())
+      Width = 32;
+    // If pointer size is not set through target data, it will default to
+    // Module::AnyPointerSize.
+    if (Width == 0 || Width > 64)
+      break;
+    InPhysReg = getNextPhysReg(InPhysReg, Width);
+    if (!InPhysReg)
+      break;
+    InVirtReg = getVirtRegFor(InPhysReg);
+    if (!InVirtReg)
+      continue;
+    AttributeSet Attrs = F.getAttributes();
+    if (Attrs.hasAttribute(AttrIdx, Attribute::SExt))
+      VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width)));
+    else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt))
+      VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::ZExt, Width)));
+  }
+}
+
+
+BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+  if (Sub == 0)
+    return MachineEvaluator::mask(Reg, 0);
+  using namespace Hexagon;
+  const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+  unsigned ID = RC->getID();
+  uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
+  switch (ID) {
+    case DoubleRegsRegClassID:
+      return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
+                                   : BT::BitMask(RW, 2*RW-1);
+    default:
+      break;
+  }
+#ifndef NDEBUG
+  dbgs() << PrintReg(Reg, &TRI, Sub) << '\n';
+#endif
+  llvm_unreachable("Unexpected register/subregister");
+}
+
+
+namespace {
+  struct RegisterRefs : public std::vector<BT::RegisterRef> {
+    typedef std::vector<BT::RegisterRef> Base;
+    RegisterRefs(const MachineInstr *MI);
+    const BT::RegisterRef &operator[](unsigned n) const {
+      // The main purpose of this operator is to assert with bad argument.
+      assert(n < size());
+      return Base::operator[](n);
+    }
+  };
+
+  RegisterRefs::RegisterRefs(const MachineInstr *MI)
+    : Base(MI->getNumOperands()) {
+    for (unsigned i = 0, n = size(); i < n; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg())
+        at(i) = BT::RegisterRef(MO);
+      // For indices that don't correspond to registers, the entry will
+      // remain constructed via the default constructor.
+    }
+  }
+}
+
+
+bool HexagonEvaluator::evaluate(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  unsigned NumDefs = 0;
+
+  // Sanity verification: there should not be any defs with subregisters.
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    NumDefs++;
+    assert(MO.getSubReg() == 0);
+  }
+
+  if (NumDefs == 0)
+    return false;
+
+  if (MI->mayLoad())
+    return evaluateLoad(MI, Inputs, Outputs);
+
+  // Check COPY instructions that copy formal parameters into virtual
+  // registers. Such parameters can be sign- or zero-extended at the
+  // call site, and we should take advantage of this knowledge. The MRI
+  // keeps a list of pairs of live-in physical and virtual registers,
+  // which provides information about which virtual registers will hold
+  // the argument values. The function will still contain instructions
+  // defining those virtual registers, and in practice those are COPY
+  // instructions from a physical to a virtual register. In such cases,
+  // applying the argument extension to the virtual register can be seen
+  // as simply mirroring the extension that had already been applied to
+  // the physical register at the call site. If the defining instruction
+  // was not a COPY, it would not be clear how to mirror that extension
+  // on the callee's side. For that reason, only check COPY instructions
+  // for potential extensions.
+  if (MI->isCopy()) {
+    if (evaluateFormalCopy(MI, Inputs, Outputs))
+      return true;
+  }
+
+  // Beyond this point, if any operand is a global, skip that instruction.
+  // The reason is that certain instructions that can take an immediate
+  // operand can also have a global symbol in that operand. To avoid
+  // checking what kind of operand a given instruction has individually
+  // for each instruction, do it here. Global symbols as operands gene-
+  // rally do not provide any useful information.
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() ||
+        MO.isCPI())
+      return false;
+  }
+
+  RegisterRefs Reg(MI);
+  unsigned Opc = MI->getOpcode();
+  using namespace Hexagon;
+  #define op(i) MI->getOperand(i)
+  #define rc(i) RegisterCell::ref(getCell(Reg[i],Inputs))
+  #define im(i) MI->getOperand(i).getImm()
+
+  // If the instruction has no register operands, skip it.
+  if (Reg.size() == 0)
+    return false;
+
+  // Record result for register in operand 0.
+  auto rr0 = [this,Reg] (const BT::RegisterCell &Val, CellMapType &Outputs)
+        -> bool {
+    putCell(Reg[0], Val, Outputs);
+    return true;
+  };
+  // Get the cell corresponding to the N-th operand.
+  auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W)
+        -> BT::RegisterCell {
+    const MachineOperand &Op = MI->getOperand(N);
+    if (Op.isImm())
+      return eIMM(Op.getImm(), W);
+    if (!Op.isReg())
+      return RegisterCell::self(0, W);
+    assert(getRegBitWidth(Reg[N]) == W && "Register width mismatch");
+    return rc(N);
+  };
+  // Extract RW low bits of the cell.
+  auto lo = [this] (const BT::RegisterCell &RC, uint16_t RW)
+        -> BT::RegisterCell {
+    assert(RW <= RC.width());
+    return eXTR(RC, 0, RW);
+  };
+  // Extract RW high bits of the cell.
+  auto hi = [this] (const BT::RegisterCell &RC, uint16_t RW)
+        -> BT::RegisterCell {
+    uint16_t W = RC.width();
+    assert(RW <= W);
+    return eXTR(RC, W-RW, W);
+  };
+  // Extract N-th halfword (counting from the least significant position).
+  auto half = [this] (const BT::RegisterCell &RC, unsigned N)
+        -> BT::RegisterCell {
+    assert(N*16+16 <= RC.width());
+    return eXTR(RC, N*16, N*16+16);
+  };
+  // Shuffle bits (pick even/odd from cells and merge into result).
+  auto shuffle = [this] (const BT::RegisterCell &Rs, const BT::RegisterCell &Rt,
+                         uint16_t BW, bool Odd) -> BT::RegisterCell {
+    uint16_t I = Odd, Ws = Rs.width();
+    assert(Ws == Rt.width());
+    RegisterCell RC = eXTR(Rt, I*BW, I*BW+BW).cat(eXTR(Rs, I*BW, I*BW+BW));
+    I += 2;
+    while (I*BW < Ws) {
+      RC.cat(eXTR(Rt, I*BW, I*BW+BW)).cat(eXTR(Rs, I*BW, I*BW+BW));
+      I += 2;
+    }
+    return RC;
+  };
+
+  // The bitwidth of the 0th operand. In most (if not all) of the
+  // instructions below, the 0th operand is the defined register.
+  // Pre-compute the bitwidth here, because it is needed in many cases
+  // cases below.
+  uint16_t W0 = (Reg[0].Reg != 0) ? getRegBitWidth(Reg[0]) : 0;
+
+  switch (Opc) {
+    // Transfer immediate:
+
+    case A2_tfrsi:
+    case A2_tfrpi:
+    case CONST32:
+    case CONST32_Float_Real:
+    case CONST32_Int_Real:
+    case CONST64_Float_Real:
+    case CONST64_Int_Real:
+      return rr0(eIMM(im(1), W0), Outputs);
+    case TFR_PdFalse:
+      return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs);
+    case TFR_PdTrue:
+      return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs);
+    case TFR_FI: {
+      int FI = op(1).getIndex();
+      int Off = op(2).getImm();
+      unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off);
+      unsigned L = Log2_32(A);
+      RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+      RC.fill(0, L, BT::BitValue::Zero);
+      return rr0(RC, Outputs);
+    }
+
+    // Transfer register:
+
+    case A2_tfr:
+    case A2_tfrp:
+    case C2_pxfer_map:
+      return rr0(rc(1), Outputs);
+    case C2_tfrpr: {
+      uint16_t RW = W0;
+      uint16_t PW = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+      assert(PW <= RW);
+      RegisterCell PC = eXTR(rc(1), 0, PW);
+      RegisterCell RC = RegisterCell(RW).insert(PC, BT::BitMask(0, PW-1));
+      RC.fill(PW, RW, BT::BitValue::Zero);
+      return rr0(RC, Outputs);
+    }
+    case C2_tfrrp: {
+      RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
+      W0 = 8; // XXX Pred size
+      return rr0(eINS(RC, eXTR(rc(1), 0, W0), 0), Outputs);
+    }
+
+    // Arithmetic:
+
+    case A2_abs:
+    case A2_absp:
+      // TODO
+      break;
+
+    case A2_addsp: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      assert(W0 == 64 && W1 == 32);
+      RegisterCell CW = RegisterCell(W0).insert(rc(1), BT::BitMask(0, W1-1));
+      RegisterCell RC = eADD(eSXT(CW, W1), rc(2));
+      return rr0(RC, Outputs);
+    }
+    case A2_add:
+    case A2_addp:
+      return rr0(eADD(rc(1), rc(2)), Outputs);
+    case A2_addi:
+      return rr0(eADD(rc(1), eIMM(im(2), W0)), Outputs);
+    case S4_addi_asl_ri: {
+      RegisterCell RC = eADD(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_addi_lsr_ri: {
+      RegisterCell RC = eADD(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_addaddi: {
+      RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyri_addi: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyrr_addi: {
+      RegisterCell M = eMLS(rc(2), rc(3));
+      RegisterCell RC = eADD(eIMM(im(1), W0), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyri_addr_u2: {
+      RegisterCell M = eMLS(eIMM(im(2), W0), rc(3));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyri_addr: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M4_mpyrr_addr: {
+      RegisterCell M = eMLS(rc(2), rc(3));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case S4_subaddi: {
+      RegisterCell RC = eADD(rc(1), eSUB(eIMM(im(2), W0), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    case M2_accii: {
+      RegisterCell RC = eADD(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M2_acci: {
+      RegisterCell RC = eADD(rc(1), eADD(rc(2), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    case M2_subacc: {
+      RegisterCell RC = eADD(rc(1), eSUB(rc(2), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    case S2_addasl_rrri: {
+      RegisterCell RC = eADD(rc(1), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case C4_addipc: {
+      RegisterCell RPC = RegisterCell::self(Reg[0].Reg, W0);
+      RPC.fill(0, 2, BT::BitValue::Zero);
+      return rr0(eADD(RPC, eIMM(im(2), W0)), Outputs);
+    }
+    case A2_sub:
+    case A2_subp:
+      return rr0(eSUB(rc(1), rc(2)), Outputs);
+    case A2_subri:
+      return rr0(eSUB(eIMM(im(1), W0), rc(2)), Outputs);
+    case S4_subi_asl_ri: {
+      RegisterCell RC = eSUB(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_subi_lsr_ri: {
+      RegisterCell RC = eSUB(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case M2_naccii: {
+      RegisterCell RC = eSUB(rc(1), eADD(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M2_nacci: {
+      RegisterCell RC = eSUB(rc(1), eADD(rc(2), rc(3)));
+      return rr0(RC, Outputs);
+    }
+    // 32-bit negation is done by "Rd = A2_subri 0, Rs"
+    case A2_negp:
+      return rr0(eSUB(eIMM(0, W0), rc(1)), Outputs);
+
+    case M2_mpy_up: {
+      RegisterCell M = eMLS(rc(1), rc(2));
+      return rr0(hi(M, W0), Outputs);
+    }
+    case M2_dpmpyss_s0:
+      return rr0(eMLS(rc(1), rc(2)), Outputs);
+    case M2_dpmpyss_acc_s0:
+      return rr0(eADD(rc(1), eMLS(rc(2), rc(3))), Outputs);
+    case M2_dpmpyss_nac_s0:
+      return rr0(eSUB(rc(1), eMLS(rc(2), rc(3))), Outputs);
+    case M2_mpyi: {
+      RegisterCell M = eMLS(rc(1), rc(2));
+      return rr0(lo(M, W0), Outputs);
+    }
+    case M2_macsip: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M2_macsin: {
+      RegisterCell M = eMLS(rc(2), eIMM(im(3), W0));
+      RegisterCell RC = eSUB(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M2_maci: {
+      RegisterCell M = eMLS(rc(2), rc(3));
+      RegisterCell RC = eADD(rc(1), lo(M, W0));
+      return rr0(RC, Outputs);
+    }
+    case M2_mpysmi: {
+      RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+      return rr0(lo(M, 32), Outputs);
+    }
+    case M2_mpysin: {
+      RegisterCell M = eMLS(rc(1), eIMM(-im(2), W0));
+      return rr0(lo(M, 32), Outputs);
+    }
+    case M2_mpysip: {
+      RegisterCell M = eMLS(rc(1), eIMM(im(2), W0));
+      return rr0(lo(M, 32), Outputs);
+    }
+    case M2_mpyu_up: {
+      RegisterCell M = eMLU(rc(1), rc(2));
+      return rr0(hi(M, W0), Outputs);
+    }
+    case M2_dpmpyuu_s0:
+      return rr0(eMLU(rc(1), rc(2)), Outputs);
+    case M2_dpmpyuu_acc_s0:
+      return rr0(eADD(rc(1), eMLU(rc(2), rc(3))), Outputs);
+    case M2_dpmpyuu_nac_s0:
+      return rr0(eSUB(rc(1), eMLU(rc(2), rc(3))), Outputs);
+    //case M2_mpysu_up:
+
+    // Logical/bitwise:
+
+    case A2_andir:
+      return rr0(eAND(rc(1), eIMM(im(2), W0)), Outputs);
+    case A2_and:
+    case A2_andp:
+      return rr0(eAND(rc(1), rc(2)), Outputs);
+    case A4_andn:
+    case A4_andnp:
+      return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+    case S4_andi_asl_ri: {
+      RegisterCell RC = eAND(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_andi_lsr_ri: {
+      RegisterCell RC = eAND(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case M4_and_and:
+      return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case M4_and_andn:
+      return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case M4_and_or:
+      return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case M4_and_xor:
+      return rr0(eAND(rc(1), eXOR(rc(2), rc(3))), Outputs);
+    case A2_orir:
+      return rr0(eORL(rc(1), eIMM(im(2), W0)), Outputs);
+    case A2_or:
+    case A2_orp:
+      return rr0(eORL(rc(1), rc(2)), Outputs);
+    case A4_orn:
+    case A4_ornp:
+      return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+    case S4_ori_asl_ri: {
+      RegisterCell RC = eORL(eIMM(im(1), W0), eASL(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case S4_ori_lsr_ri: {
+      RegisterCell RC = eORL(eIMM(im(1), W0), eLSR(rc(2), im(3)));
+      return rr0(RC, Outputs);
+    }
+    case M4_or_and:
+      return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case M4_or_andn:
+      return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case S4_or_andi:
+    case S4_or_andix: {
+      RegisterCell RC = eORL(rc(1), eAND(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case S4_or_ori: {
+      RegisterCell RC = eORL(rc(1), eORL(rc(2), eIMM(im(3), W0)));
+      return rr0(RC, Outputs);
+    }
+    case M4_or_or:
+      return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case M4_or_xor:
+      return rr0(eORL(rc(1), eXOR(rc(2), rc(3))), Outputs);
+    case A2_xor:
+    case A2_xorp:
+      return rr0(eXOR(rc(1), rc(2)), Outputs);
+    case M4_xor_and:
+      return rr0(eXOR(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case M4_xor_andn:
+      return rr0(eXOR(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case M4_xor_or:
+      return rr0(eXOR(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case M4_xor_xacc:
+      return rr0(eXOR(rc(1), eXOR(rc(2), rc(3))), Outputs);
+    case A2_not:
+    case A2_notp:
+      return rr0(eNOT(rc(1)), Outputs);
+
+    case S2_asl_i_r:
+    case S2_asl_i_p:
+      return rr0(eASL(rc(1), im(2)), Outputs);
+    case A2_aslh:
+      return rr0(eASL(rc(1), 16), Outputs);
+    case S2_asl_i_r_acc:
+    case S2_asl_i_p_acc:
+      return rr0(eADD(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_nac:
+    case S2_asl_i_p_nac:
+      return rr0(eSUB(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_and:
+    case S2_asl_i_p_and:
+      return rr0(eAND(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_or:
+    case S2_asl_i_p_or:
+      return rr0(eORL(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_r_xacc:
+    case S2_asl_i_p_xacc:
+      return rr0(eXOR(rc(1), eASL(rc(2), im(3))), Outputs);
+    case S2_asl_i_vh:
+    case S2_asl_i_vw:
+      // TODO
+      break;
+
+    case S2_asr_i_r:
+    case S2_asr_i_p:
+      return rr0(eASR(rc(1), im(2)), Outputs);
+    case A2_asrh:
+      return rr0(eASR(rc(1), 16), Outputs);
+    case S2_asr_i_r_acc:
+    case S2_asr_i_p_acc:
+      return rr0(eADD(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_nac:
+    case S2_asr_i_p_nac:
+      return rr0(eSUB(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_and:
+    case S2_asr_i_p_and:
+      return rr0(eAND(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_or:
+    case S2_asr_i_p_or:
+      return rr0(eORL(rc(1), eASR(rc(2), im(3))), Outputs);
+    case S2_asr_i_r_rnd: {
+      // The input is first sign-extended to 64 bits, then the output
+      // is truncated back to 32 bits.
+      assert(W0 == 32);
+      RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+      RegisterCell RC = eASR(eADD(eASR(XC, im(2)), eIMM(1, 2*W0)), 1);
+      return rr0(eXTR(RC, 0, W0), Outputs);
+    }
+    case S2_asr_i_r_rnd_goodsyntax: {
+      int64_t S = im(2);
+      if (S == 0)
+        return rr0(rc(1), Outputs);
+      // Result: S2_asr_i_r_rnd Rs, u5-1
+      RegisterCell XC = eSXT(rc(1).cat(eIMM(0, W0)), W0);
+      RegisterCell RC = eLSR(eADD(eASR(XC, S-1), eIMM(1, 2*W0)), 1);
+      return rr0(eXTR(RC, 0, W0), Outputs);
+    }
+    case S2_asr_r_vh:
+    case S2_asr_i_vw:
+    case S2_asr_i_svw_trun:
+      // TODO
+      break;
+
+    case S2_lsr_i_r:
+    case S2_lsr_i_p:
+      return rr0(eLSR(rc(1), im(2)), Outputs);
+    case S2_lsr_i_r_acc:
+    case S2_lsr_i_p_acc:
+      return rr0(eADD(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_nac:
+    case S2_lsr_i_p_nac:
+      return rr0(eSUB(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_and:
+    case S2_lsr_i_p_and:
+      return rr0(eAND(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_or:
+    case S2_lsr_i_p_or:
+      return rr0(eORL(rc(1), eLSR(rc(2), im(3))), Outputs);
+    case S2_lsr_i_r_xacc:
+    case S2_lsr_i_p_xacc:
+      return rr0(eXOR(rc(1), eLSR(rc(2), im(3))), Outputs);
+
+    case S2_clrbit_i: {
+      RegisterCell RC = rc(1);
+      RC[im(2)] = BT::BitValue::Zero;
+      return rr0(RC, Outputs);
+    }
+    case S2_setbit_i: {
+      RegisterCell RC = rc(1);
+      RC[im(2)] = BT::BitValue::One;
+      return rr0(RC, Outputs);
+    }
+    case S2_togglebit_i: {
+      RegisterCell RC = rc(1);
+      uint16_t BX = im(2);
+      RC[BX] = RC[BX].is(0) ? BT::BitValue::One
+                            : RC[BX].is(1) ? BT::BitValue::Zero
+                                           : BT::BitValue::self();
+      return rr0(RC, Outputs);
+    }
+
+    case A4_bitspliti: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      uint16_t BX = im(2);
+      // Res.uw[1] = Rs[bx+1:], Res.uw[0] = Rs[0:bx]
+      const BT::BitValue Zero = BT::BitValue::Zero;
+      RegisterCell RZ = RegisterCell(W0).fill(BX, W1, Zero)
+                                        .fill(W1+(W1-BX), W0, Zero);
+      RegisterCell BF1 = eXTR(rc(1), 0, BX), BF2 = eXTR(rc(1), BX, W1);
+      RegisterCell RC = eINS(eINS(RZ, BF1, 0), BF2, W1);
+      return rr0(RC, Outputs);
+    }
+    case S4_extract:
+    case S4_extractp:
+    case S2_extractu:
+    case S2_extractup: {
+      uint16_t Wd = im(2), Of = im(3);
+      assert(Wd <= W0);
+      if (Wd == 0)
+        return rr0(eIMM(0, W0), Outputs);
+      // If the width extends beyond the register size, pad the register
+      // with 0 bits.
+      RegisterCell Pad = (Wd+Of > W0) ? rc(1).cat(eIMM(0, Wd+Of-W0)) : rc(1);
+      RegisterCell Ext = eXTR(Pad, Of, Wd+Of);
+      // Ext is short, need to extend it with 0s or sign bit.
+      RegisterCell RC = RegisterCell(W0).insert(Ext, BT::BitMask(0, Wd-1));
+      if (Opc == S2_extractu || Opc == S2_extractup)
+        return rr0(eZXT(RC, Wd), Outputs);
+      return rr0(eSXT(RC, Wd), Outputs);
+    }
+    case S2_insert:
+    case S2_insertp: {
+      uint16_t Wd = im(3), Of = im(4);
+      assert(Wd < W0 && Of < W0);
+      // If Wd+Of exceeds W0, the inserted bits are truncated.
+      if (Wd+Of > W0)
+        Wd = W0-Of;
+      if (Wd == 0)
+        return rr0(rc(1), Outputs);
+      return rr0(eINS(rc(1), eXTR(rc(2), 0, Wd), Of), Outputs);
+    }
+
+    // Bit permutations:
+
+    case A2_combineii:
+    case A4_combineii:
+    case A4_combineir:
+    case A4_combineri:
+    case A2_combinew:
+      assert(W0 % 2 == 0);
+      return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs);
+    case A2_combine_ll:
+    case A2_combine_lh:
+    case A2_combine_hl:
+    case A2_combine_hh: {
+      assert(W0 == 32);
+      assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+      // Low half in the output is 0 for _ll and _hl, 1 otherwise:
+      unsigned LoH = !(Opc == A2_combine_ll || Opc == A2_combine_hl);
+      // High half in the output is 0 for _ll and _lh, 1 otherwise:
+      unsigned HiH = !(Opc == A2_combine_ll || Opc == A2_combine_lh);
+      RegisterCell R1 = rc(1);
+      RegisterCell R2 = rc(2);
+      RegisterCell RC = half(R2, LoH).cat(half(R1, HiH));
+      return rr0(RC, Outputs);
+    }
+    case S2_packhl: {
+      assert(W0 == 64);
+      assert(getRegBitWidth(Reg[1]) == 32 && getRegBitWidth(Reg[2]) == 32);
+      RegisterCell R1 = rc(1);
+      RegisterCell R2 = rc(2);
+      RegisterCell RC = half(R2, 0).cat(half(R1, 0)).cat(half(R2, 1))
+                                   .cat(half(R1, 1));
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffeb: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 8, false);
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffeh: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 16, false);
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffob: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 8, true);
+      return rr0(RC, Outputs);
+    }
+    case S2_shuffoh: {
+      RegisterCell RC = shuffle(rc(1), rc(2), 16, true);
+      return rr0(RC, Outputs);
+    }
+    case C2_mask: {
+      uint16_t WR = W0;
+      uint16_t WP = 8; // XXX Pred size: getRegBitWidth(Reg[1]);
+      assert(WR == 64 && WP == 8);
+      RegisterCell R1 = rc(1);
+      RegisterCell RC(WR);
+      for (uint16_t i = 0; i < WP; ++i) {
+        const BT::BitValue &V = R1[i];
+        BT::BitValue F = (V.is(0) || V.is(1)) ? V : BT::BitValue::self();
+        RC.fill(i*8, i*8+8, F);
+      }
+      return rr0(RC, Outputs);
+    }
+
+    // Mux:
+
+    case C2_muxii:
+    case C2_muxir:
+    case C2_muxri:
+    case C2_mux: {
+      BT::BitValue PC0 = rc(1)[0];
+      RegisterCell R2 = cop(2, W0);
+      RegisterCell R3 = cop(3, W0);
+      if (PC0.is(0) || PC0.is(1))
+        return rr0(RegisterCell::ref(PC0 ? R2 : R3), Outputs);
+      R2.meet(R3, Reg[0].Reg);
+      return rr0(R2, Outputs);
+    }
+    case C2_vmux:
+      // TODO
+      break;
+
+    // Sign- and zero-extension:
+
+    case A2_sxtb:
+      return rr0(eSXT(rc(1), 8), Outputs);
+    case A2_sxth:
+      return rr0(eSXT(rc(1), 16), Outputs);
+    case A2_sxtw: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      assert(W0 == 64 && W1 == 32);
+      RegisterCell RC = eSXT(rc(1).cat(eIMM(0, W1)), W1);
+      return rr0(RC, Outputs);
+    }
+    case A2_zxtb:
+      return rr0(eZXT(rc(1), 8), Outputs);
+    case A2_zxth:
+      return rr0(eZXT(rc(1), 16), Outputs);
+
+    // Bit count:
+
+    case S2_cl0:
+    case S2_cl0p:
+      // Always produce a 32-bit result.
+      return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+    case S2_cl1:
+    case S2_cl1p:
+      return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+    case S2_clb:
+    case S2_clbp: {
+      uint16_t W1 = getRegBitWidth(Reg[1]);
+      RegisterCell R1 = rc(1);
+      BT::BitValue TV = R1[W1-1];
+      if (TV.is(0) || TV.is(1))
+        return rr0(eCLB(R1, TV, 32), Outputs);
+      break;
+    }
+    case S2_ct0:
+    case S2_ct0p:
+      return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+    case S2_ct1:
+    case S2_ct1p:
+      return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+    case S5_popcountp:
+      // TODO
+      break;
+
+    case C2_all8: {
+      RegisterCell P1 = rc(1);
+      bool Has0 = false, All1 = true;
+      for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+        if (!P1[i].is(1))
+          All1 = false;
+        if (!P1[i].is(0))
+          continue;
+        Has0 = true;
+        break;
+      }
+      if (!Has0 && !All1)
+        break;
+      RegisterCell RC(W0);
+      RC.fill(0, W0, (All1 ? BT::BitValue::One : BT::BitValue::Zero));
+      return rr0(RC, Outputs);
+    }
+    case C2_any8: {
+      RegisterCell P1 = rc(1);
+      bool Has1 = false, All0 = true;
+      for (uint16_t i = 0; i < 8/*XXX*/; ++i) {
+        if (!P1[i].is(0))
+          All0 = false;
+        if (!P1[i].is(1))
+          continue;
+        Has1 = true;
+        break;
+      }
+      if (!Has1 && !All0)
+        break;
+      RegisterCell RC(W0);
+      RC.fill(0, W0, (Has1 ? BT::BitValue::One : BT::BitValue::Zero));
+      return rr0(RC, Outputs);
+    }
+    case C2_and:
+      return rr0(eAND(rc(1), rc(2)), Outputs);
+    case C2_andn:
+      return rr0(eAND(rc(1), eNOT(rc(2))), Outputs);
+    case C2_not:
+      return rr0(eNOT(rc(1)), Outputs);
+    case C2_or:
+      return rr0(eORL(rc(1), rc(2)), Outputs);
+    case C2_orn:
+      return rr0(eORL(rc(1), eNOT(rc(2))), Outputs);
+    case C2_xor:
+      return rr0(eXOR(rc(1), rc(2)), Outputs);
+    case C4_and_and:
+      return rr0(eAND(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case C4_and_andn:
+      return rr0(eAND(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case C4_and_or:
+      return rr0(eAND(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case C4_and_orn:
+      return rr0(eAND(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+    case C4_or_and:
+      return rr0(eORL(rc(1), eAND(rc(2), rc(3))), Outputs);
+    case C4_or_andn:
+      return rr0(eORL(rc(1), eAND(rc(2), eNOT(rc(3)))), Outputs);
+    case C4_or_or:
+      return rr0(eORL(rc(1), eORL(rc(2), rc(3))), Outputs);
+    case C4_or_orn:
+      return rr0(eORL(rc(1), eORL(rc(2), eNOT(rc(3)))), Outputs);
+    case C2_bitsclr:
+    case C2_bitsclri:
+    case C2_bitsset:
+    case C4_nbitsclr:
+    case C4_nbitsclri:
+    case C4_nbitsset:
+      // TODO
+      break;
+    case S2_tstbit_i:
+    case S4_ntstbit_i: {
+      BT::BitValue V = rc(1)[im(2)];
+      if (V.is(0) || V.is(1)) {
+        // If instruction is S2_tstbit_i, test for 1, otherwise test for 0.
+        bool TV = (Opc == S2_tstbit_i);
+        BT::BitValue F = V.is(TV) ? BT::BitValue::One : BT::BitValue::Zero;
+        return rr0(RegisterCell(W0).fill(0, W0, F), Outputs);
+      }
+      break;
+    }
+
+    default:
+      return MachineEvaluator::evaluate(MI, Inputs, Outputs);
+  }
+  #undef im
+  #undef rc
+  #undef op
+  return false;
+}
+
+
+bool HexagonEvaluator::evaluate(const MachineInstr *BI,
+      const CellMapType &Inputs, BranchTargetList &Targets,
+      bool &FallsThru) const {
+  // We need to evaluate one branch at a time. TII::AnalyzeBranch checks
+  // all the branches in a basic block at once, so we cannot use it.
+  unsigned Opc = BI->getOpcode();
+  bool SimpleBranch = false;
+  bool Negated = false;
+  switch (Opc) {
+    case Hexagon::J2_jumpf:
+    case Hexagon::J2_jumpfnew:
+    case Hexagon::J2_jumpfnewpt:
+      Negated = true;
+    case Hexagon::J2_jumpt:
+    case Hexagon::J2_jumptnew:
+    case Hexagon::J2_jumptnewpt:
+      // Simple branch:  if([!]Pn) jump ...
+      // i.e. Op0 = predicate, Op1 = branch target.
+      SimpleBranch = true;
+      break;
+    case Hexagon::J2_jump:
+      Targets.insert(BI->getOperand(0).getMBB());
+      FallsThru = false;
+      return true;
+    default:
+      // If the branch is of unknown type, assume that all successors are
+      // executable.
+      return false;
+  }
+
+  if (!SimpleBranch)
+    return false;
+
+  // BI is a conditional branch if we got here.
+  RegisterRef PR = BI->getOperand(0);
+  RegisterCell PC = getCell(PR, Inputs);
+  const BT::BitValue &Test = PC[0];
+
+  // If the condition is neither true nor false, then it's unknown.
+  if (!Test.is(0) && !Test.is(1))
+    return false;
+
+  // "Test.is(!Negated)" means "branch condition is true".
+  if (!Test.is(!Negated)) {
+    // Condition known to be false.
+    FallsThru = true;
+    return true;
+  }
+
+  Targets.insert(BI->getOperand(1).getMBB());
+  FallsThru = false;
+  return true;
+}
+
+
+bool HexagonEvaluator::evaluateLoad(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  if (TII.isPredicated(MI))
+    return false;
+  assert(MI->mayLoad() && "A load that mayn't?");
+  unsigned Opc = MI->getOpcode();
+
+  uint16_t BitNum;
+  bool SignEx;
+  using namespace Hexagon;
+
+  switch (Opc) {
+    default:
+      return false;
+
+#if 0
+    // memb_fifo
+    case L2_loadalignb_pbr:
+    case L2_loadalignb_pcr:
+    case L2_loadalignb_pi:
+    // memh_fifo
+    case L2_loadalignh_pbr:
+    case L2_loadalignh_pcr:
+    case L2_loadalignh_pi:
+    // membh
+    case L2_loadbsw2_pbr:
+    case L2_loadbsw2_pci:
+    case L2_loadbsw2_pcr:
+    case L2_loadbsw2_pi:
+    case L2_loadbsw4_pbr:
+    case L2_loadbsw4_pci:
+    case L2_loadbsw4_pcr:
+    case L2_loadbsw4_pi:
+    // memubh
+    case L2_loadbzw2_pbr:
+    case L2_loadbzw2_pci:
+    case L2_loadbzw2_pcr:
+    case L2_loadbzw2_pi:
+    case L2_loadbzw4_pbr:
+    case L2_loadbzw4_pci:
+    case L2_loadbzw4_pcr:
+    case L2_loadbzw4_pi:
+#endif
+
+    case L2_loadrbgp:
+    case L2_loadrb_io:
+    case L2_loadrb_pbr:
+    case L2_loadrb_pci:
+    case L2_loadrb_pcr:
+    case L2_loadrb_pi:
+    case L4_loadrb_abs:
+    case L4_loadrb_ap:
+    case L4_loadrb_rr:
+    case L4_loadrb_ur:
+      BitNum = 8;
+      SignEx = true;
+      break;
+
+    case L2_loadrubgp:
+    case L2_loadrub_io:
+    case L2_loadrub_pbr:
+    case L2_loadrub_pci:
+    case L2_loadrub_pcr:
+    case L2_loadrub_pi:
+    case L4_loadrub_abs:
+    case L4_loadrub_ap:
+    case L4_loadrub_rr:
+    case L4_loadrub_ur:
+      BitNum = 8;
+      SignEx = false;
+      break;
+
+    case L2_loadrhgp:
+    case L2_loadrh_io:
+    case L2_loadrh_pbr:
+    case L2_loadrh_pci:
+    case L2_loadrh_pcr:
+    case L2_loadrh_pi:
+    case L4_loadrh_abs:
+    case L4_loadrh_ap:
+    case L4_loadrh_rr:
+    case L4_loadrh_ur:
+      BitNum = 16;
+      SignEx = true;
+      break;
+
+    case L2_loadruhgp:
+    case L2_loadruh_io:
+    case L2_loadruh_pbr:
+    case L2_loadruh_pci:
+    case L2_loadruh_pcr:
+    case L2_loadruh_pi:
+    case L4_loadruh_rr:
+    case L4_loadruh_abs:
+    case L4_loadruh_ap:
+    case L4_loadruh_ur:
+      BitNum = 16;
+      SignEx = false;
+      break;
+
+    case L2_loadrigp:
+    case L2_loadri_io:
+    case L2_loadri_pbr:
+    case L2_loadri_pci:
+    case L2_loadri_pcr:
+    case L2_loadri_pi:
+    case L2_loadw_locked:
+    case L4_loadri_abs:
+    case L4_loadri_ap:
+    case L4_loadri_rr:
+    case L4_loadri_ur:
+    case LDriw_pred:
+      BitNum = 32;
+      SignEx = true;
+      break;
+
+    case L2_loadrdgp:
+    case L2_loadrd_io:
+    case L2_loadrd_pbr:
+    case L2_loadrd_pci:
+    case L2_loadrd_pcr:
+    case L2_loadrd_pi:
+    case L4_loadd_locked:
+    case L4_loadrd_abs:
+    case L4_loadrd_ap:
+    case L4_loadrd_rr:
+    case L4_loadrd_ur:
+      BitNum = 64;
+      SignEx = true;
+      break;
+  }
+
+  const MachineOperand &MD = MI->getOperand(0);
+  assert(MD.isReg() && MD.isDef());
+  RegisterRef RD = MD;
+
+  uint16_t W = getRegBitWidth(RD);
+  assert(W >= BitNum && BitNum > 0);
+  RegisterCell Res(W);
+
+  for (uint16_t i = 0; i < BitNum; ++i)
+    Res[i] = BT::BitValue::self(BT::BitRef(RD.Reg, i));
+
+  if (SignEx) {
+    const BT::BitValue &Sign = Res[BitNum-1];
+    for (uint16_t i = BitNum; i < W; ++i)
+      Res[i] = BT::BitValue::ref(Sign);
+  } else {
+    for (uint16_t i = BitNum; i < W; ++i)
+      Res[i] = BT::BitValue::Zero;
+  }
+
+  putCell(RD, Res, Outputs);
+  return true;
+}
+
+
+bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr *MI,
+      const CellMapType &Inputs, CellMapType &Outputs) const {
+  // If MI defines a formal parameter, but is not a copy (loads are handled
+  // in evaluateLoad), then it's not clear what to do.
+  assert(MI->isCopy());
+
+  RegisterRef RD = MI->getOperand(0);
+  RegisterRef RS = MI->getOperand(1);
+  assert(RD.Sub == 0);
+  if (!TargetRegisterInfo::isPhysicalRegister(RS.Reg))
+    return false;
+  RegExtMap::const_iterator F = VRX.find(RD.Reg);
+  if (F == VRX.end())
+    return false;
+
+  uint16_t EW = F->second.Width;
+  // Store RD's cell into the map. This will associate the cell with a virtual
+  // register, and make zero-/sign-extends possible (otherwise we would be ex-
+  // tending "self" bit values, which will have no effect, since "self" values
+  // cannot be references to anything).
+  putCell(RD, getCell(RS, Inputs), Outputs);
+
+  RegisterCell Res;
+  // Read RD's cell from the outputs instead of RS's cell from the inputs:
+  if (F->second.Type == ExtType::SExt)
+    Res = eSXT(getCell(RD, Outputs), EW);
+  else if (F->second.Type == ExtType::ZExt)
+    Res = eZXT(getCell(RD, Outputs), EW);
+
+  putCell(RD, Res, Outputs);
+  return true;
+}
+
+
+unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
+  using namespace Hexagon;
+  bool Is64 = DoubleRegsRegClass.contains(PReg);
+  assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
+
+  static const unsigned Phys32[] = { R0, R1, R2, R3, R4, R5 };
+  static const unsigned Phys64[] = { D0, D1, D2 };
+  const unsigned Num32 = sizeof(Phys32)/sizeof(unsigned);
+  const unsigned Num64 = sizeof(Phys64)/sizeof(unsigned);
+
+  // Return the first parameter register of the required width.
+  if (PReg == 0)
+    return (Width <= 32) ? Phys32[0] : Phys64[0];
+
+  // Set Idx32, Idx64 in such a way that Idx+1 would give the index of the
+  // next register.
+  unsigned Idx32 = 0, Idx64 = 0;
+  if (!Is64) {
+    while (Idx32 < Num32) {
+      if (Phys32[Idx32] == PReg)
+        break;
+      Idx32++;
+    }
+    Idx64 = Idx32/2;
+  } else {
+    while (Idx64 < Num64) {
+      if (Phys64[Idx64] == PReg)
+        break;
+      Idx64++;
+    }
+    Idx32 = Idx64*2+1;
+  }
+
+  if (Width <= 32)
+    return (Idx32+1 < Num32) ? Phys32[Idx32+1] : 0;
+  return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
+}
+
+
+unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
+  typedef MachineRegisterInfo::livein_iterator iterator;
+  for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+    if (I->first == PReg)
+      return I->second;
+  }
+  return 0;
+}
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
new file mode 100644
index 000000000000..897af2d71870
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -0,0 +1,64 @@
+//===--- HexagonBitTracker.h ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONBITTRACKER_H
+#define HEXAGONBITTRACKER_H
+
+#include "BitTracker.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+  class HexagonInstrInfo;
+  class HexagonRegisterInfo;
+
+struct HexagonEvaluator : public BitTracker::MachineEvaluator {
+  typedef BitTracker::CellMapType CellMapType;
+  typedef BitTracker::RegisterRef RegisterRef;
+  typedef BitTracker::RegisterCell RegisterCell;
+  typedef BitTracker::BranchTargetList BranchTargetList;
+
+  HexagonEvaluator(const HexagonRegisterInfo &tri, MachineRegisterInfo &mri,
+                   const HexagonInstrInfo &tii, MachineFunction &mf);
+
+  bool evaluate(const MachineInstr *MI, const CellMapType &Inputs,
+                CellMapType &Outputs) const override;
+  bool evaluate(const MachineInstr *BI, const CellMapType &Inputs,
+                BranchTargetList &Targets, bool &FallsThru) const override;
+
+  BitTracker::BitMask mask(unsigned Reg, unsigned Sub) const override;
+
+  MachineFunction &MF;
+  MachineFrameInfo &MFI;
+  const HexagonInstrInfo &TII;
+
+private:
+  bool evaluateLoad(const MachineInstr *MI, const CellMapType &Inputs,
+                    CellMapType &Outputs) const;
+  bool evaluateFormalCopy(const MachineInstr *MI, const CellMapType &Inputs,
+                          CellMapType &Outputs) const;
+
+  unsigned getNextPhysReg(unsigned PReg, unsigned Width) const;
+  unsigned getVirtRegFor(unsigned PReg) const;
+
+  // Type of formal parameter extension.
+  struct ExtType {
+    enum { SExt, ZExt };
+    char Type;
+    uint16_t Width;
+    ExtType() : Type(0), Width(0) {}
+    ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+  };
+  // Map VR -> extension type.
+  typedef DenseMap<unsigned, ExtType> RegExtMap;
+  RegExtMap VRX;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
new file mode 100644
index 000000000000..9f5fac156527
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -0,0 +1,1325 @@
+//===--- HexagonCommonGEP.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "commgep"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true),
+  cl::Hidden, cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableInv("commgep-inv", cl::init(true), cl::Hidden,
+  cl::ZeroOrMore);
+
+static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true),
+  cl::Hidden, cl::ZeroOrMore);
+
+namespace llvm {
+  void initializeHexagonCommonGEPPass(PassRegistry&);
+}
+
+namespace {
+  struct GepNode;
+  typedef std::set<GepNode*> NodeSet;
+  typedef std::map<GepNode*,Value*> NodeToValueMap;
+  typedef std::vector<GepNode*> NodeVect;
+  typedef std::map<GepNode*,NodeVect> NodeChildrenMap;
+  typedef std::set<Use*> UseSet;
+  typedef std::map<GepNode*,UseSet> NodeToUsesMap;
+
+  // Numbering map for gep nodes. Used to keep track of ordering for
+  // gep nodes.
+  struct NodeNumbering : public std::map<const GepNode*,unsigned> {
+  };
+
+  struct NodeOrdering : public NodeNumbering {
+    NodeOrdering() : LastNum(0) {}
+#ifdef _MSC_VER
+    void special_insert_for_special_msvc(const GepNode *N)
+#else
+    using NodeNumbering::insert;
+    void insert(const GepNode* N)
+#endif
+    {
+      insert(std::make_pair(N, ++LastNum));
+    }
+    bool operator() (const GepNode* N1, const GepNode *N2) const {
+      const_iterator F1 = find(N1), F2 = find(N2);
+      assert(F1 != end() && F2 != end());
+      return F1->second < F2->second;
+    }
+  private:
+    unsigned LastNum;
+  };
+
+
+  class HexagonCommonGEP : public FunctionPass {
+  public:
+    static char ID;
+    HexagonCommonGEP() : FunctionPass(ID) {
+      initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnFunction(Function &F);
+    virtual const char *getPassName() const {
+      return "Hexagon Common GEP";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addRequired<PostDominatorTree>();
+      AU.addPreserved<PostDominatorTree>();
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    typedef std::map<Value*,GepNode*> ValueToNodeMap;
+    typedef std::vector<Value*> ValueVect;
+    typedef std::map<GepNode*,ValueVect> NodeToValuesMap;
+
+    void getBlockTraversalOrder(BasicBlock *Root, ValueVect &Order);
+    bool isHandledGepForm(GetElementPtrInst *GepI);
+    void processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM);
+    void collect();
+    void common();
+
+    BasicBlock *recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM,
+                                     NodeToValueMap &Loc);
+    BasicBlock *recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM,
+                                        NodeToValueMap &Loc);
+    bool isInvariantIn(Value *Val, Loop *L);
+    bool isInvariantIn(GepNode *Node, Loop *L);
+    bool isInMainPath(BasicBlock *B, Loop *L);
+    BasicBlock *adjustForInvariance(GepNode *Node, NodeChildrenMap &NCM,
+                                    NodeToValueMap &Loc);
+    void separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc);
+    void separateConstantChains(GepNode *Node, NodeChildrenMap &NCM,
+                                NodeToValueMap &Loc);
+    void computeNodePlacement(NodeToValueMap &Loc);
+
+    Value *fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+                        BasicBlock *LocB);
+    void getAllUsersForNode(GepNode *Node, ValueVect &Values,
+                            NodeChildrenMap &NCM);
+    void materialize(NodeToValueMap &Loc);
+
+    void removeDeadCode();
+
+    NodeVect Nodes;
+    NodeToUsesMap Uses;
+    NodeOrdering NodeOrder;   // Node ordering, for deterministic behavior.
+    SpecificBumpPtrAllocator<GepNode> *Mem;
+    LLVMContext *Ctx;
+    LoopInfo *LI;
+    DominatorTree *DT;
+    PostDominatorTree *PDT;
+    Function *Fn;
+  };
+}
+
+
+char HexagonCommonGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
+      false, false)
+
+namespace {
+  struct GepNode {
+    enum {
+      None      = 0,
+      Root      = 0x01,
+      Internal  = 0x02,
+      Used      = 0x04
+    };
+
+    uint32_t Flags;
+    union {
+      GepNode *Parent;
+      Value *BaseVal;
+    };
+    Value *Idx;
+    Type *PTy;  // Type of the pointer operand.
+
+    GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {}
+    GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) {
+      if (Flags & Root)
+        BaseVal = N->BaseVal;
+      else
+        Parent = N->Parent;
+    }
+    friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN);
+  };
+
+
+  Type *next_type(Type *Ty, Value *Idx) {
+    // Advance the type.
+    if (!Ty->isStructTy()) {
+      Type *NexTy = cast<SequentialType>(Ty)->getElementType();
+      return NexTy;
+    }
+    // Otherwise it is a struct type.
+    ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+    assert(CI && "Struct type with non-constant index");
+    int64_t i = CI->getValue().getSExtValue();
+    Type *NextTy = cast<StructType>(Ty)->getElementType(i);
+    return NextTy;
+  }
+
+
+  raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) {
+    OS << "{ {";
+    bool Comma = false;
+    if (GN.Flags & GepNode::Root) {
+      OS << "root";
+      Comma = true;
+    }
+    if (GN.Flags & GepNode::Internal) {
+      if (Comma)
+        OS << ',';
+      OS << "internal";
+      Comma = true;
+    }
+    if (GN.Flags & GepNode::Used) {
+      if (Comma)
+        OS << ',';
+      OS << "used";
+      Comma = true;
+    }
+    OS << "} ";
+    if (GN.Flags & GepNode::Root)
+      OS << "BaseVal:" << GN.BaseVal->getName() << '(' << GN.BaseVal << ')';
+    else
+      OS << "Parent:" << GN.Parent;
+
+    OS << " Idx:";
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GN.Idx))
+      OS << CI->getValue().getSExtValue();
+    else if (GN.Idx->hasName())
+      OS << GN.Idx->getName();
+    else
+      OS << "<anon> =" << *GN.Idx;
+
+    OS << " PTy:";
+    if (GN.PTy->isStructTy()) {
+      StructType *STy = cast<StructType>(GN.PTy);
+      if (!STy->isLiteral())
+        OS << GN.PTy->getStructName();
+      else
+        OS << "<anon-struct>:" << *STy;
+    }
+    else
+      OS << *GN.PTy;
+    OS << " }";
+    return OS;
+  }
+
+
+  template <typename NodeContainer>
+  void dump_node_container(raw_ostream &OS, const NodeContainer &S) {
+    typedef typename NodeContainer::const_iterator const_iterator;
+    for (const_iterator I = S.begin(), E = S.end(); I != E; ++I)
+      OS << *I << ' ' << **I << '\n';
+  }
+
+  raw_ostream &operator<< (raw_ostream &OS,
+                           const NodeVect &S) LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const NodeVect &S) {
+    dump_node_container(OS, S);
+    return OS;
+  }
+
+
+  raw_ostream &operator<< (raw_ostream &OS,
+                           const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
+    typedef NodeToUsesMap::const_iterator const_iterator;
+    for (const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+      const UseSet &Us = I->second;
+      OS << I->first << " -> #" << Us.size() << '{';
+      for (UseSet::const_iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+        User *R = (*J)->getUser();
+        if (R->hasName())
+          OS << ' ' << R->getName();
+        else
+          OS << " <?>(" << *R << ')';
+      }
+      OS << " }\n";
+    }
+    return OS;
+  }
+
+
+  struct in_set {
+    in_set(const NodeSet &S) : NS(S) {}
+    bool operator() (GepNode *N) const {
+      return NS.find(N) != NS.end();
+    }
+  private:
+    const NodeSet &NS;
+  };
+}
+
+
+inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) {
+  return A.Allocate();
+}
+
+
+void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
+      ValueVect &Order) {
+  // Compute block ordering for a typical DT-based traversal of the flow
+  // graph: "before visiting a block, all of its dominators must have been
+  // visited".
+
+  Order.push_back(Root);
+  DomTreeNode *DTN = DT->getNode(Root);
+  typedef GraphTraits<DomTreeNode*> GTN;
+  typedef GTN::ChildIteratorType Iter;
+  for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+    getBlockTraversalOrder((*I)->getBlock(), Order);
+}
+
+
+bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
+  // No vector GEPs.
+  if (!GepI->getType()->isPointerTy())
+    return false;
+  // No GEPs without any indices.  (Is this possible?)
+  if (GepI->idx_begin() == GepI->idx_end())
+    return false;
+  return true;
+}
+
+
+void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
+      ValueToNodeMap &NM) {
+  DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
+  GepNode *N = new (*Mem) GepNode;
+  Value *PtrOp = GepI->getPointerOperand();
+  ValueToNodeMap::iterator F = NM.find(PtrOp);
+  if (F == NM.end()) {
+    N->BaseVal = PtrOp;
+    N->Flags |= GepNode::Root;
+  } else {
+    // If PtrOp was a GEP instruction, it must have already been processed.
+    // The ValueToNodeMap entry for it is the last gep node in the generated
+    // chain. Link to it here.
+    N->Parent = F->second;
+  }
+  N->PTy = PtrOp->getType();
+  N->Idx = *GepI->idx_begin();
+
+  // Collect the list of users of this GEP instruction. Will add it to the
+  // last node created for it.
+  UseSet Us;
+  for (Value::user_iterator UI = GepI->user_begin(), UE = GepI->user_end();
+       UI != UE; ++UI) {
+    // Check if this gep is used by anything other than other geps that
+    // we will process.
+    if (isa<GetElementPtrInst>(*UI)) {
+      GetElementPtrInst *UserG = cast<GetElementPtrInst>(*UI);
+      if (isHandledGepForm(UserG))
+        continue;
+    }
+    Us.insert(&UI.getUse());
+  }
+  Nodes.push_back(N);
+#ifdef _MSC_VER
+  NodeOrder.special_insert_for_special_msvc(N);
+#else
+  NodeOrder.insert(N);
+#endif
+
+  // Skip the first index operand, since we only handle 0. This dereferences
+  // the pointer operand.
+  GepNode *PN = N;
+  Type *PtrTy = cast<PointerType>(PtrOp->getType())->getElementType();
+  for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end();
+       OI != OE; ++OI) {
+    Value *Op = *OI;
+    GepNode *Nx = new (*Mem) GepNode;
+    Nx->Parent = PN;  // Link Nx to the previous node.
+    Nx->Flags |= GepNode::Internal;
+    Nx->PTy = PtrTy;
+    Nx->Idx = Op;
+    Nodes.push_back(Nx);
+#ifdef _MSC_VER
+    NodeOrder.special_insert_for_special_msvc(Nx);
+#else
+    NodeOrder.insert(Nx);
+#endif
+    PN = Nx;
+
+    PtrTy = next_type(PtrTy, Op);
+  }
+
+  // After last node has been created, update the use information.
+  if (!Us.empty()) {
+    PN->Flags |= GepNode::Used;
+    Uses[PN].insert(Us.begin(), Us.end());
+  }
+
+  // Link the last node with the originating GEP instruction. This is to
+  // help with linking chained GEP instructions.
+  NM.insert(std::make_pair(GepI, PN));
+}
+
+
+void HexagonCommonGEP::collect() {
+  // Establish depth-first traversal order of the dominator tree.
+  ValueVect BO;
+  getBlockTraversalOrder(Fn->begin(), BO);
+
+  // The creation of gep nodes requires DT-traversal. When processing a GEP
+  // instruction that uses another GEP instruction as the base pointer, the
+  // gep node for the base pointer should already exist.
+  ValueToNodeMap NM;
+  for (ValueVect::iterator I = BO.begin(), E = BO.end(); I != E; ++I) {
+    BasicBlock *B = cast<BasicBlock>(*I);
+    for (BasicBlock::iterator J = B->begin(), F = B->end(); J != F; ++J) {
+      if (!isa<GetElementPtrInst>(J))
+        continue;
+      GetElementPtrInst *GepI = cast<GetElementPtrInst>(J);
+      if (isHandledGepForm(GepI))
+        processGepInst(GepI, NM);
+    }
+  }
+
+  DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
+}
+
+
+namespace {
+  void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
+        NodeVect &Roots) {
+    typedef NodeVect::const_iterator const_iterator;
+    for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+      GepNode *N = *I;
+      if (N->Flags & GepNode::Root) {
+        Roots.push_back(N);
+        continue;
+      }
+      GepNode *PN = N->Parent;
+      NCM[PN].push_back(N);
+    }
+  }
+
+  void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) {
+    NodeVect Work;
+    Work.push_back(Root);
+    Nodes.insert(Root);
+
+    while (!Work.empty()) {
+      NodeVect::iterator First = Work.begin();
+      GepNode *N = *First;
+      Work.erase(First);
+      NodeChildrenMap::iterator CF = NCM.find(N);
+      if (CF != NCM.end()) {
+        Work.insert(Work.end(), CF->second.begin(), CF->second.end());
+        Nodes.insert(CF->second.begin(), CF->second.end());
+      }
+    }
+  }
+}
+
+
+namespace {
+  typedef std::set<NodeSet> NodeSymRel;
+  typedef std::pair<GepNode*,GepNode*> NodePair;
+  typedef std::set<NodePair> NodePairSet;
+
+  const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
+    for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I)
+      if (I->count(N))
+        return &*I;
+    return 0;
+  }
+
+  // Create an ordered pair of GepNode pointers. The pair will be used in
+  // determining equality. The only purpose of the ordering is to eliminate
+  // duplication due to the commutativity of equality/non-equality.
+  NodePair node_pair(GepNode *N1, GepNode *N2) {
+    uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2);
+    if (P1 <= P2)
+      return std::make_pair(N1, N2);
+    return std::make_pair(N2, N1);
+  }
+
+  unsigned node_hash(GepNode *N) {
+    // Include everything except flags and parent.
+    FoldingSetNodeID ID;
+    ID.AddPointer(N->Idx);
+    ID.AddPointer(N->PTy);
+    return ID.ComputeHash();
+  }
+
+  bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) {
+    // Don't cache the result for nodes with different hashes. The hash
+    // comparison is fast enough.
+    if (node_hash(N1) != node_hash(N2))
+      return false;
+
+    NodePair NP = node_pair(N1, N2);
+    NodePairSet::iterator FEq = Eq.find(NP);
+    if (FEq != Eq.end())
+      return true;
+    NodePairSet::iterator FNe = Ne.find(NP);
+    if (FNe != Ne.end())
+      return false;
+    // Not previously compared.
+    bool Root1 = N1->Flags & GepNode::Root;
+    bool Root2 = N2->Flags & GepNode::Root;
+    NodePair P = node_pair(N1, N2);
+    // If the Root flag has different values, the nodes are different.
+    // If both nodes are root nodes, but their base pointers differ,
+    // they are different.
+    if (Root1 != Root2 || (Root1 && N1->BaseVal != N2->BaseVal)) {
+      Ne.insert(P);
+      return false;
+    }
+    // Here the root flags are identical, and for root nodes the
+    // base pointers are equal, so the root nodes are equal.
+    // For non-root nodes, compare their parent nodes.
+    if (Root1 || node_eq(N1->Parent, N2->Parent, Eq, Ne)) {
+      Eq.insert(P);
+      return true;
+    }
+    return false;
+  }
+}
+
+
+void HexagonCommonGEP::common() {
+  // The essence of this commoning is finding gep nodes that are equal.
+  // To do this we need to compare all pairs of nodes. To save time,
+  // first, partition the set of all nodes into sets of potentially equal
+  // nodes, and then compare pairs from within each partition.
+  typedef std::map<unsigned,NodeSet> NodeSetMap;
+  NodeSetMap MaybeEq;
+
+  for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    GepNode *N = *I;
+    unsigned H = node_hash(N);
+    MaybeEq[H].insert(N);
+  }
+
+  // Compute the equivalence relation for the gep nodes.  Use two caches,
+  // one for equality and the other for non-equality.
+  NodeSymRel EqRel;  // Equality relation (as set of equivalence classes).
+  NodePairSet Eq, Ne;  // Caches.
+  for (NodeSetMap::iterator I = MaybeEq.begin(), E = MaybeEq.end();
+       I != E; ++I) {
+    NodeSet &S = I->second;
+    for (NodeSet::iterator NI = S.begin(), NE = S.end(); NI != NE; ++NI) {
+      GepNode *N = *NI;
+      // If node already has a class, then the class must have been created
+      // in a prior iteration of this loop. Since equality is transitive,
+      // nothing more will be added to that class, so skip it.
+      if (node_class(N, EqRel))
+        continue;
+
+      // Create a new class candidate now.
+      NodeSet C;
+      for (NodeSet::iterator NJ = std::next(NI); NJ != NE; ++NJ)
+        if (node_eq(N, *NJ, Eq, Ne))
+          C.insert(*NJ);
+      // If Tmp is empty, N would be the only element in it. Don't bother
+      // creating a class for it then.
+      if (!C.empty()) {
+        C.insert(N);  // Finalize the set before adding it to the relation.
+        std::pair<NodeSymRel::iterator, bool> Ins = EqRel.insert(C);
+        (void)Ins;
+        assert(Ins.second && "Cannot add a class");
+      }
+    }
+  }
+
+  DEBUG({
+    dbgs() << "Gep node equality:\n";
+    for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I)
+      dbgs() << "{ " << I->first << ", " << I->second << " }\n";
+
+    dbgs() << "Gep equivalence classes:\n";
+    for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+      dbgs() << '{';
+      const NodeSet &S = *I;
+      for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) {
+        if (J != S.begin())
+          dbgs() << ',';
+        dbgs() << ' ' << *J;
+      }
+      dbgs() << " }\n";
+    }
+  });
+
+
+  // Create a projection from a NodeSet to the minimal element in it.
+  typedef std::map<const NodeSet*,GepNode*> ProjMap;
+  ProjMap PM;
+  for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+    const NodeSet &S = *I;
+    GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder);
+    std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min));
+    (void)Ins;
+    assert(Ins.second && "Cannot add minimal element");
+
+    // Update the min element's flags, and user list.
+    uint32_t Flags = 0;
+    UseSet &MinUs = Uses[Min];
+    for (NodeSet::iterator J = S.begin(), F = S.end(); J != F; ++J) {
+      GepNode *N = *J;
+      uint32_t NF = N->Flags;
+      // If N is used, append all original values of N to the list of
+      // original values of Min.
+      if (NF & GepNode::Used)
+        MinUs.insert(Uses[N].begin(), Uses[N].end());
+      Flags |= NF;
+    }
+    if (MinUs.empty())
+      Uses.erase(Min);
+
+    // The collected flags should include all the flags from the min element.
+    assert((Min->Flags & Flags) == Min->Flags);
+    Min->Flags = Flags;
+  }
+
+  // Commoning: for each non-root gep node, replace "Parent" with the
+  // selected (minimum) node from the corresponding equivalence class.
+  // If a given parent does not have an equivalence class, leave it
+  // unchanged (it means that it's the only element in its class).
+  for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    GepNode *N = *I;
+    if (N->Flags & GepNode::Root)
+      continue;
+    const NodeSet *PC = node_class(N->Parent, EqRel);
+    if (!PC)
+      continue;
+    ProjMap::iterator F = PM.find(PC);
+    if (F == PM.end())
+      continue;
+    // Found a replacement, use it.
+    GepNode *Rep = F->second;
+    N->Parent = Rep;
+  }
+
+  DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes);
+
+  // Finally, erase the nodes that are no longer used.
+  NodeSet Erase;
+  for (NodeVect::iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
+    GepNode *N = *I;
+    const NodeSet *PC = node_class(N, EqRel);
+    if (!PC)
+      continue;
+    ProjMap::iterator F = PM.find(PC);
+    if (F == PM.end())
+      continue;
+    if (N == F->second)
+      continue;
+    // Node for removal.
+    Erase.insert(*I);
+  }
+  NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(),
+                                           in_set(Erase));
+  Nodes.resize(std::distance(Nodes.begin(), NewE));
+
+  DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
+}
+
+
+namespace {
+  template <typename T>
+  BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) {
+    DEBUG({
+      dbgs() << "NCD of {";
+      for (typename T::iterator I = Blocks.begin(), E = Blocks.end();
+           I != E; ++I) {
+        if (!*I)
+          continue;
+        BasicBlock *B = cast<BasicBlock>(*I);
+        dbgs() << ' ' << B->getName();
+      }
+      dbgs() << " }\n";
+    });
+
+    // Allow null basic blocks in Blocks.  In such cases, return 0.
+    typename T::iterator I = Blocks.begin(), E = Blocks.end();
+    if (I == E || !*I)
+      return 0;
+    BasicBlock *Dom = cast<BasicBlock>(*I);
+    while (++I != E) {
+      BasicBlock *B = cast_or_null<BasicBlock>(*I);
+      Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0;
+      if (!Dom)
+        return 0;
+    }
+    DEBUG(dbgs() << "computed:" << Dom->getName() << '\n');
+    return Dom;
+  }
+
+  template <typename T>
+  BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) {
+    // If two blocks, A and B, dominate a block C, then A dominates B,
+    // or B dominates A.
+    typename T::iterator I = Blocks.begin(), E = Blocks.end();
+    // Find the first non-null block.
+    while (I != E && !*I)
+      ++I;
+    if (I == E)
+      return DT->getRoot();
+    BasicBlock *DomB = cast<BasicBlock>(*I);
+    while (++I != E) {
+      if (!*I)
+        continue;
+      BasicBlock *B = cast<BasicBlock>(*I);
+      if (DT->dominates(B, DomB))
+        continue;
+      if (!DT->dominates(DomB, B))
+        return 0;
+      DomB = B;
+    }
+    return DomB;
+  }
+
+  // Find the first use in B of any value from Values. If no such use,
+  // return B->end().
+  template <typename T>
+  BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) {
+    BasicBlock::iterator FirstUse = B->end(), BEnd = B->end();
+    typedef typename T::iterator iterator;
+    for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) {
+      Value *V = *I;
+      // If V is used in a PHI node, the use belongs to the incoming block,
+      // not the block with the PHI node. In the incoming block, the use
+      // would be considered as being at the end of it, so it cannot
+      // influence the position of the first use (which is assumed to be
+      // at the end to start with).
+      if (isa<PHINode>(V))
+        continue;
+      if (!isa<Instruction>(V))
+        continue;
+      Instruction *In = cast<Instruction>(V);
+      if (In->getParent() != B)
+        continue;
+      BasicBlock::iterator It = In;
+      if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd))
+        FirstUse = It;
+    }
+    return FirstUse;
+  }
+
+  bool is_empty(const BasicBlock *B) {
+    return B->empty() || (&*B->begin() == B->getTerminator());
+  }
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  DEBUG(dbgs() << "Loc for node:" << Node << '\n');
+  // Recalculate the placement for Node, assuming that the locations of
+  // its children in Loc are valid.
+  // Return 0 if there is no valid placement for Node (for example, it
+  // uses an index value that is not available at the location required
+  // to dominate all children, etc.).
+
+  // Find the nearest common dominator for:
+  // - all users, if the node is used, and
+  // - all children.
+  ValueVect Bs;
+  if (Node->Flags & GepNode::Used) {
+    // Append all blocks with uses of the original values to the
+    // block vector Bs.
+    NodeToUsesMap::iterator UF = Uses.find(Node);
+    assert(UF != Uses.end() && "Used node with no use information");
+    UseSet &Us = UF->second;
+    for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+      Use *U = *I;
+      User *R = U->getUser();
+      if (!isa<Instruction>(R))
+        continue;
+      BasicBlock *PB = isa<PHINode>(R)
+          ? cast<PHINode>(R)->getIncomingBlock(*U)
+          : cast<Instruction>(R)->getParent();
+      Bs.push_back(PB);
+    }
+  }
+  // Append the location of each child.
+  NodeChildrenMap::iterator CF = NCM.find(Node);
+  if (CF != NCM.end()) {
+    NodeVect &Cs = CF->second;
+    for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+      GepNode *CN = *I;
+      NodeToValueMap::iterator LF = Loc.find(CN);
+      // If the child is only used in GEP instructions (i.e. is not used in
+      // non-GEP instructions), the nearest dominator computed for it may
+      // have been null. In such case it won't have a location available.
+      if (LF == Loc.end())
+        continue;
+      Bs.push_back(LF->second);
+    }
+  }
+
+  BasicBlock *DomB = nearest_common_dominator(DT, Bs);
+  if (!DomB)
+    return 0;
+  // Check if the index used by Node dominates the computed dominator.
+  Instruction *IdxI = dyn_cast<Instruction>(Node->Idx);
+  if (IdxI && !DT->dominates(IdxI->getParent(), DomB))
+    return 0;
+
+  // Avoid putting nodes into empty blocks.
+  while (is_empty(DomB)) {
+    DomTreeNode *N = (*DT)[DomB]->getIDom();
+    if (!N)
+      break;
+    DomB = N->getBlock();
+  }
+
+  // Otherwise, DomB is fine. Update the location map.
+  Loc[Node] = DomB;
+  return DomB;
+}
+
+
+BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n');
+  // Recalculate the placement of Node, after recursively recalculating the
+  // placements of all its children.
+  NodeChildrenMap::iterator CF = NCM.find(Node);
+  if (CF != NCM.end()) {
+    NodeVect &Cs = CF->second;
+    for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+      recalculatePlacementRec(*I, NCM, Loc);
+  }
+  BasicBlock *LB = recalculatePlacement(Node, NCM, Loc);
+  DEBUG(dbgs() << "LocRec end for node:" << Node << '\n');
+  return LB;
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) {
+  if (isa<Constant>(Val) || isa<Argument>(Val))
+    return true;
+  Instruction *In = dyn_cast<Instruction>(Val);
+  if (!In)
+    return false;
+  BasicBlock *HdrB = L->getHeader(), *DefB = In->getParent();
+  return DT->properlyDominates(DefB, HdrB);
+}
+
+
+bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) {
+  if (Node->Flags & GepNode::Root)
+    if (!isInvariantIn(Node->BaseVal, L))
+      return false;
+  return isInvariantIn(Node->Idx, L);
+}
+
+
+bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) {
+  BasicBlock *HB = L->getHeader();
+  BasicBlock *LB = L->getLoopLatch();
+  // B must post-dominate the loop header or dominate the loop latch.
+  if (PDT->dominates(B, HB))
+    return true;
+  if (LB && DT->dominates(B, LB))
+    return true;
+  return false;
+}
+
+
+namespace {
+  BasicBlock *preheader(DominatorTree *DT, Loop *L) {
+    if (BasicBlock *PH = L->getLoopPreheader())
+      return PH;
+    if (!OptSpeculate)
+      return 0;
+    DomTreeNode *DN = DT->getNode(L->getHeader());
+    if (!DN)
+      return 0;
+    return DN->getIDom()->getBlock();
+  }
+}
+
+
+BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  // Find the "topmost" location for Node: it must be dominated by both,
+  // its parent (or the BaseVal, if it's a root node), and by the index
+  // value.
+  ValueVect Bs;
+  if (Node->Flags & GepNode::Root) {
+    if (Instruction *PIn = dyn_cast<Instruction>(Node->BaseVal))
+      Bs.push_back(PIn->getParent());
+  } else {
+    Bs.push_back(Loc[Node->Parent]);
+  }
+  if (Instruction *IIn = dyn_cast<Instruction>(Node->Idx))
+    Bs.push_back(IIn->getParent());
+  BasicBlock *TopB = nearest_common_dominatee(DT, Bs);
+
+  // Traverse the loop nest upwards until we find a loop in which Node
+  // is no longer invariant, or until we get to the upper limit of Node's
+  // placement. The traversal will also stop when a suitable "preheader"
+  // cannot be found for a given loop. The "preheader" may actually be
+  // a regular block outside of the loop (i.e. not guarded), in which case
+  // the Node will be speculated.
+  // For nodes that are not in the main path of the containing loop (i.e.
+  // are not executed in each iteration), do not move them out of the loop.
+  BasicBlock *LocB = cast_or_null<BasicBlock>(Loc[Node]);
+  if (LocB) {
+    Loop *Lp = LI->getLoopFor(LocB);
+    while (Lp) {
+      if (!isInvariantIn(Node, Lp) || !isInMainPath(LocB, Lp))
+        break;
+      BasicBlock *NewLoc = preheader(DT, Lp);
+      if (!NewLoc || !DT->dominates(TopB, NewLoc))
+        break;
+      Lp = Lp->getParentLoop();
+      LocB = NewLoc;
+    }
+  }
+  Loc[Node] = LocB;
+
+  // Recursively compute the locations of all children nodes.
+  NodeChildrenMap::iterator CF = NCM.find(Node);
+  if (CF != NCM.end()) {
+    NodeVect &Cs = CF->second;
+    for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I)
+      adjustForInvariance(*I, NCM, Loc);
+  }
+  return LocB;
+}
+
+
+namespace {
+  struct LocationAsBlock {
+    LocationAsBlock(const NodeToValueMap &L) : Map(L) {}
+    const NodeToValueMap &Map;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS,
+                           const LocationAsBlock &Loc) LLVM_ATTRIBUTE_UNUSED ;
+  raw_ostream &operator<< (raw_ostream &OS, const LocationAsBlock &Loc) {
+    for (NodeToValueMap::const_iterator I = Loc.Map.begin(), E = Loc.Map.end();
+         I != E; ++I) {
+      OS << I->first << " -> ";
+      BasicBlock *B = cast<BasicBlock>(I->second);
+      OS << B->getName() << '(' << B << ')';
+      OS << '\n';
+    }
+    return OS;
+  }
+
+  inline bool is_constant(GepNode *N) {
+    return isa<ConstantInt>(N->Idx);
+  }
+}
+
+
+void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
+      NodeToValueMap &Loc) {
+  User *R = U->getUser();
+  DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: "
+               << *R << '\n');
+  BasicBlock *PB = cast<Instruction>(R)->getParent();
+
+  GepNode *N = Node;
+  GepNode *C = 0, *NewNode = 0;
+  while (is_constant(N) && !(N->Flags & GepNode::Root)) {
+    // XXX if (single-use) dont-replicate;
+    GepNode *NewN = new (*Mem) GepNode(N);
+    Nodes.push_back(NewN);
+    Loc[NewN] = PB;
+
+    if (N == Node)
+      NewNode = NewN;
+    NewN->Flags &= ~GepNode::Used;
+    if (C)
+      C->Parent = NewN;
+    C = NewN;
+    N = N->Parent;
+  }
+  if (!NewNode)
+    return;
+
+  // Move over all uses that share the same user as U from Node to NewNode.
+  NodeToUsesMap::iterator UF = Uses.find(Node);
+  assert(UF != Uses.end());
+  UseSet &Us = UF->second;
+  UseSet NewUs;
+  for (UseSet::iterator I = Us.begin(); I != Us.end(); ) {
+    User *S = (*I)->getUser();
+    UseSet::iterator Nx = std::next(I);
+    if (S == R) {
+      NewUs.insert(*I);
+      Us.erase(I);
+    }
+    I = Nx;
+  }
+  if (Us.empty()) {
+    Node->Flags &= ~GepNode::Used;
+    Uses.erase(UF);
+  }
+
+  // Should at least have U in NewUs.
+  NewNode->Flags |= GepNode::Used;
+  DEBUG(dbgs() << "new node: " << NewNode << "  " << *NewNode << '\n');
+  assert(!NewUs.empty());
+  Uses[NewNode] = NewUs;
+}
+
+
+void HexagonCommonGEP::separateConstantChains(GepNode *Node,
+      NodeChildrenMap &NCM, NodeToValueMap &Loc) {
+  // First approximation: extract all chains.
+  NodeSet Ns;
+  nodes_for_root(Node, NCM, Ns);
+
+  DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n');
+  // Collect all used nodes together with the uses from loads and stores,
+  // where the GEP node could be folded into the load/store instruction.
+  NodeToUsesMap FNs; // Foldable nodes.
+  for (NodeSet::iterator I = Ns.begin(), E = Ns.end(); I != E; ++I) {
+    GepNode *N = *I;
+    if (!(N->Flags & GepNode::Used))
+      continue;
+    NodeToUsesMap::iterator UF = Uses.find(N);
+    assert(UF != Uses.end());
+    UseSet &Us = UF->second;
+    // Loads/stores that use the node N.
+    UseSet LSs;
+    for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J) {
+      Use *U = *J;
+      User *R = U->getUser();
+      // We're interested in uses that provide the address. It can happen
+      // that the value may also be provided via GEP, but we won't handle
+      // those cases here for now.
+      if (LoadInst *Ld = dyn_cast<LoadInst>(R)) {
+        unsigned PtrX = LoadInst::getPointerOperandIndex();
+        if (&Ld->getOperandUse(PtrX) == U)
+          LSs.insert(U);
+      } else if (StoreInst *St = dyn_cast<StoreInst>(R)) {
+        unsigned PtrX = StoreInst::getPointerOperandIndex();
+        if (&St->getOperandUse(PtrX) == U)
+          LSs.insert(U);
+      }
+    }
+    // Even if the total use count is 1, separating the chain may still be
+    // beneficial, since the constant chain may be longer than the GEP alone
+    // would be (e.g. if the parent node has a constant index and also has
+    // other children).
+    if (!LSs.empty())
+      FNs.insert(std::make_pair(N, LSs));
+  }
+
+  DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs);
+
+  for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) {
+    GepNode *N = I->first;
+    UseSet &Us = I->second;
+    for (UseSet::iterator J = Us.begin(), F = Us.end(); J != F; ++J)
+      separateChainForNode(N, *J, Loc);
+  }
+}
+
+
+void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
+  // Compute the inverse of the Node.Parent links. Also, collect the set
+  // of root nodes.
+  NodeChildrenMap NCM;
+  NodeVect Roots;
+  invert_find_roots(Nodes, NCM, Roots);
+
+  // Compute the initial placement determined by the users' locations, and
+  // the locations of the child nodes.
+  for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+    recalculatePlacementRec(*I, NCM, Loc);
+
+  DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc));
+
+  if (OptEnableInv) {
+    for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+      adjustForInvariance(*I, NCM, Loc);
+
+    DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
+                 << LocationAsBlock(Loc));
+  }
+  if (OptEnableConst) {
+    for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
+      separateConstantChains(*I, NCM, Loc);
+  }
+  DEBUG(dbgs() << "Node use information:\n" << Uses);
+
+  // At the moment, there is no further refinement of the initial placement.
+  // Such a refinement could include splitting the nodes if they are placed
+  // too far from some of its users.
+
+  DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc));
+}
+
+
+Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
+      BasicBlock *LocB) {
+  DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName()
+               << " for nodes:\n" << NA);
+  unsigned Num = NA.size();
+  GepNode *RN = NA[0];
+  assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
+
+  Value *NewInst = 0;
+  Value *Input = RN->BaseVal;
+  Value **IdxList = new Value*[Num+1];
+  unsigned nax = 0;
+  do {
+    unsigned IdxC = 0;
+    // If the type of the input of the first node is not a pointer,
+    // we need to add an artificial i32 0 to the indices (because the
+    // actual input in the IR will be a pointer).
+    if (!NA[nax]->PTy->isPointerTy()) {
+      Type *Int32Ty = Type::getInt32Ty(*Ctx);
+      IdxList[IdxC++] = ConstantInt::get(Int32Ty, 0);
+    }
+
+    // Keep adding indices from NA until we have to stop and generate
+    // an "intermediate" GEP.
+    while (++nax <= Num) {
+      GepNode *N = NA[nax-1];
+      IdxList[IdxC++] = N->Idx;
+      if (nax < Num) {
+        // We have to stop, if the expected type of the output of this node
+        // is not the same as the input type of the next node.
+        Type *NextTy = next_type(N->PTy, N->Idx);
+        if (NextTy != NA[nax]->PTy)
+          break;
+      }
+    }
+    ArrayRef<Value*> A(IdxList, IdxC);
+    Type *InpTy = Input->getType();
+    Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
+    NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At);
+    DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
+    Input = NewInst;
+  } while (nax <= Num);
+
+  delete[] IdxList;
+  return NewInst;
+}
+
+
+void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
+      NodeChildrenMap &NCM) {
+  NodeVect Work;
+  Work.push_back(Node);
+
+  while (!Work.empty()) {
+    NodeVect::iterator First = Work.begin();
+    GepNode *N = *First;
+    Work.erase(First);
+    if (N->Flags & GepNode::Used) {
+      NodeToUsesMap::iterator UF = Uses.find(N);
+      assert(UF != Uses.end() && "No use information for used node");
+      UseSet &Us = UF->second;
+      for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I)
+        Values.push_back((*I)->getUser());
+    }
+    NodeChildrenMap::iterator CF = NCM.find(N);
+    if (CF != NCM.end()) {
+      NodeVect &Cs = CF->second;
+      Work.insert(Work.end(), Cs.begin(), Cs.end());
+    }
+  }
+}
+
+
+void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
+  DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n');
+  NodeChildrenMap NCM;
+  NodeVect Roots;
+  // Compute the inversion again, since computing placement could alter
+  // "parent" relation between nodes.
+  invert_find_roots(Nodes, NCM, Roots);
+
+  while (!Roots.empty()) {
+    NodeVect::iterator First = Roots.begin();
+    GepNode *Root = *First, *Last = *First;
+    Roots.erase(First);
+
+    NodeVect NA;  // Nodes to assemble.
+    // Append to NA all child nodes up to (and including) the first child
+    // that:
+    // (1) has more than 1 child, or
+    // (2) is used, or
+    // (3) has a child located in a different block.
+    bool LastUsed = false;
+    unsigned LastCN = 0;
+    // The location may be null if the computation failed (it can legitimately
+    // happen for nodes created from dead GEPs).
+    Value *LocV = Loc[Last];
+    if (!LocV)
+      continue;
+    BasicBlock *LastB = cast<BasicBlock>(LocV);
+    do {
+      NA.push_back(Last);
+      LastUsed = (Last->Flags & GepNode::Used);
+      if (LastUsed)
+        break;
+      NodeChildrenMap::iterator CF = NCM.find(Last);
+      LastCN = (CF != NCM.end()) ? CF->second.size() : 0;
+      if (LastCN != 1)
+        break;
+      GepNode *Child = CF->second.front();
+      BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]);
+      if (ChildB != 0 && LastB != ChildB)
+        break;
+      Last = Child;
+    } while (true);
+
+    BasicBlock::iterator InsertAt = LastB->getTerminator();
+    if (LastUsed || LastCN > 0) {
+      ValueVect Urs;
+      getAllUsersForNode(Root, Urs, NCM);
+      BasicBlock::iterator FirstUse = first_use_of_in_block(Urs, LastB);
+      if (FirstUse != LastB->end())
+        InsertAt = FirstUse;
+    }
+
+    // Generate a new instruction for NA.
+    Value *NewInst = fabricateGEP(NA, InsertAt, LastB);
+
+    // Convert all the children of Last node into roots, and append them
+    // to the Roots list.
+    if (LastCN > 0) {
+      NodeVect &Cs = NCM[Last];
+      for (NodeVect::iterator I = Cs.begin(), E = Cs.end(); I != E; ++I) {
+        GepNode *CN = *I;
+        CN->Flags &= ~GepNode::Internal;
+        CN->Flags |= GepNode::Root;
+        CN->BaseVal = NewInst;
+        Roots.push_back(CN);
+      }
+    }
+
+    // Lastly, if the Last node was used, replace all uses with the new GEP.
+    // The uses reference the original GEP values.
+    if (LastUsed) {
+      NodeToUsesMap::iterator UF = Uses.find(Last);
+      assert(UF != Uses.end() && "No use information found");
+      UseSet &Us = UF->second;
+      for (UseSet::iterator I = Us.begin(), E = Us.end(); I != E; ++I) {
+        Use *U = *I;
+        U->set(NewInst);
+      }
+    }
+  }
+}
+
+
+void HexagonCommonGEP::removeDeadCode() {
+  ValueVect BO;
+  BO.push_back(&Fn->front());
+
+  for (unsigned i = 0; i < BO.size(); ++i) {
+    BasicBlock *B = cast<BasicBlock>(BO[i]);
+    DomTreeNode *N = DT->getNode(B);
+    typedef GraphTraits<DomTreeNode*> GTN;
+    typedef GTN::ChildIteratorType Iter;
+    for (Iter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+      BO.push_back((*I)->getBlock());
+  }
+
+  for (unsigned i = BO.size(); i > 0; --i) {
+    BasicBlock *B = cast<BasicBlock>(BO[i-1]);
+    BasicBlock::InstListType &IL = B->getInstList();
+    typedef BasicBlock::InstListType::reverse_iterator reverse_iterator;
+    ValueVect Ins;
+    for (reverse_iterator I = IL.rbegin(), E = IL.rend(); I != E; ++I)
+      Ins.push_back(&*I);
+    for (ValueVect::iterator I = Ins.begin(), E = Ins.end(); I != E; ++I) {
+      Instruction *In = cast<Instruction>(*I);
+      if (isInstructionTriviallyDead(In))
+        In->eraseFromParent();
+    }
+  }
+}
+
+
+bool HexagonCommonGEP::runOnFunction(Function &F) {
+  // For now bail out on C++ exception handling.
+  for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A)
+    for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I)
+      if (isa<InvokeInst>(I) || isa<LandingPadInst>(I))
+        return false;
+
+  Fn = &F;
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  PDT = &getAnalysis<PostDominatorTree>();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  Ctx = &F.getContext();
+
+  Nodes.clear();
+  Uses.clear();
+  NodeOrder.clear();
+
+  SpecificBumpPtrAllocator<GepNode> Allocator;
+  Mem = &Allocator;
+
+  collect();
+  common();
+
+  NodeToValueMap Loc;
+  computeNodePlacement(Loc);
+  materialize(Loc);
+  removeDeadCode();
+
+#ifdef XDEBUG
+  // Run this only when expensive checks are enabled.
+  verifyFunction(F);
+#endif
+  return true;
+}
+
+
+namespace llvm {
+  FunctionPass *createHexagonCommonGEP() {
+    return new HexagonCommonGEP();
+  }
+}
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index 37ed173a79cd..ce10aeadef94 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -1,3 +1,12 @@
+//===--- HexagonExpandCondsets.cpp ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
 // Replace mux instructions with the corresponding legal instructions.
 // It is meant to work post-SSA, but still on virtual registers. It was
 // originally placed between register coalescing and machine instruction
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 868f87e18413..29283c81877e 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -864,13 +864,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
   // Check for an unused caller-saved register.
   for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
     MCPhysReg FreeReg = *CallerSavedRegs;
-    if (MRI.isPhysRegUsed(FreeReg))
+    if (!MRI.reg_nodbg_empty(FreeReg))
       continue;
 
     // Check aliased register usage.
     bool IsCurrentRegUsed = false;
     for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
-      if (MRI.isPhysRegUsed(*AI)) {
+      if (!MRI.reg_nodbg_empty(*AI)) {
         IsCurrentRegUsed = true;
         break;
       }
@@ -959,8 +959,11 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
 }
 
 
-void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan(
-      MachineFunction &MF, RegScavenger* RS) const {
+void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                                BitVector &SavedRegs,
+                                                RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
   auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
   auto &HRI = *HST.getRegisterInfo();
 
@@ -969,11 +972,9 @@ void HexagonFrameLowering::processFunctionBeforeCalleeSavedScan(
   // If we have a function containing __builtin_eh_return we want to spill and
   // restore all callee saved registers. Pretend that they are used.
   if (HasEHReturn) {
-    MachineRegisterInfo &MRI = MF.getRegInfo();
     for (const MCPhysReg *CSRegs = HRI.getCalleeSavedRegs(&MF); *CSRegs;
          ++CSRegs)
-      if (!MRI.isPhysRegUsed(*CSRegs))
-        MRI.setPhysRegUsed(*CSRegs);
+      SavedRegs.set(*CSRegs);
   }
 
   const TargetRegisterClass &RC = Hexagon::IntRegsRegClass;
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 89500cb85724..d39ee2c77195 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -45,7 +45,7 @@ public:
       MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override;
   void processFunctionBeforeFrameFinalized(MachineFunction &MF,
         RegScavenger *RS = nullptr) const override;
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
         RegScavenger *RS) const override;
 
   bool targetHandlesStackFrameRounding() const override {
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
new file mode 100644
index 000000000000..4d32208bd5aa
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -0,0 +1,259 @@
+//===--- HexagonGenExtract.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> ExtractCutoff("extract-cutoff", cl::init(~0U),
+  cl::Hidden, cl::desc("Cutoff for generating \"extract\""
+  " instructions"));
+
+// This prevents generating extract instructions that have the offset of 0.
+// One of the reasons for "extract" is to put a sequence of bits in a regis-
+// ter, starting at offset 0 (so that these bits can then be used by an
+// "insert"). If the bits are already at offset 0, it is better not to gene-
+// rate "extract", since logical bit operations can be merged into compound
+// instructions (as opposed to "extract").
+static cl::opt<bool> NoSR0("extract-nosr0", cl::init(true), cl::Hidden,
+  cl::desc("No extract instruction with offset 0"));
+
+static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
+  cl::desc("Require & in extract patterns"));
+
+namespace llvm {
+  void initializeHexagonGenExtractPass(PassRegistry&);
+  FunctionPass *createHexagonGenExtract();
+}
+
+
+namespace {
+  class HexagonGenExtract : public FunctionPass {
+  public:
+    static char ID;
+    HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
+      initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const override {
+      return "Hexagon generate \"extract\" instructions";
+    }
+    virtual bool runOnFunction(Function &F) override;
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addPreserved<MachineFunctionAnalysis>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+  private:
+    bool visitBlock(BasicBlock *B);
+    bool convert(Instruction *In);
+
+    unsigned ExtractCount;
+    DominatorTree *DT;
+  };
+
+  char HexagonGenExtract::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
+  "\"extract\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
+  "\"extract\" instructions", false, false)
+
+
+bool HexagonGenExtract::convert(Instruction *In) {
+  using namespace PatternMatch;
+  Value *BF = 0;
+  ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
+  BasicBlock *BB = In->getParent();
+  LLVMContext &Ctx = BB->getContext();
+  bool LogicalSR;
+
+  // (and (shl (lshr x, #sr), #sl), #m)
+  LogicalSR = true;
+  bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+                               m_ConstantInt(CSL)),
+                         m_ConstantInt(CM)));
+
+  if (!Match) {
+    // (and (shl (ashr x, #sr), #sl), #m)
+    LogicalSR = false;
+    Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CSL)),
+                      m_ConstantInt(CM)));
+  }
+  if (!Match) {
+    // (and (shl x, #sl), #m)
+    LogicalSR = true;
+    CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+    Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
+                      m_ConstantInt(CM)));
+    if (Match && NoSR0)
+      return false;
+  }
+  if (!Match) {
+    // (and (lshr x, #sr), #m)
+    LogicalSR = true;
+    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+    Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CM)));
+  }
+  if (!Match) {
+    // (and (ashr x, #sr), #m)
+    LogicalSR = false;
+    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
+    Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CM)));
+  }
+  if (!Match) {
+    CM = 0;
+    // (shl (lshr x, #sr), #sl)
+    LogicalSR = true;
+    Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CSL)));
+  }
+  if (!Match) {
+    CM = 0;
+    // (shl (ashr x, #sr), #sl)
+    LogicalSR = false;
+    Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
+                            m_ConstantInt(CSL)));
+  }
+  if (!Match)
+    return false;
+
+  Type *Ty = BF->getType();
+  if (!Ty->isIntegerTy())
+    return false;
+  unsigned BW = Ty->getPrimitiveSizeInBits();
+  if (BW != 32 && BW != 64)
+    return false;
+
+  uint32_t SR = CSR->getZExtValue();
+  uint32_t SL = CSL->getZExtValue();
+
+  if (!CM) {
+    // If there was no and, and the shift left did not remove all potential
+    // sign bits created by the shift right, then extractu cannot reproduce
+    // this value.
+    if (!LogicalSR && (SR > SL))
+      return false;
+    APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
+    CM = ConstantInt::get(Ctx, A);
+  }
+
+  // CM is the shifted-left mask. Shift it back right to remove the zero
+  // bits on least-significant positions.
+  APInt M = CM->getValue().lshr(SL);
+  uint32_t T = M.countTrailingOnes();
+
+  // During the shifts some of the bits will be lost. Calculate how many
+  // of the original value will remain after shift right and then left.
+  uint32_t U = BW - std::max(SL, SR);
+  // The width of the extracted field is the minimum of the original bits
+  // that remain after the shifts and the number of contiguous 1s in the mask.
+  uint32_t W = std::min(U, T);
+  if (W == 0)
+    return false;
+
+  // Check if the extracted bits are contained within the mask that it is
+  // and-ed with. The extract operation will copy these bits, and so the
+  // mask cannot any holes in it that would clear any of the bits of the
+  // extracted field.
+  if (!LogicalSR) {
+    // If the shift right was arithmetic, it could have included some 1 bits.
+    // It is still ok to generate extract, but only if the mask eliminates
+    // those bits (i.e. M does not have any bits set beyond U).
+    APInt C = APInt::getHighBitsSet(BW, BW-U);
+    if (M.intersects(C) || !APIntOps::isMask(W, M))
+      return false;
+  } else {
+    // Check if M starts with a contiguous sequence of W times 1 bits. Get
+    // the low U bits of M (which eliminates the 0 bits shifted in on the
+    // left), and check if the result is APInt's "mask":
+    if (!APIntOps::isMask(W, M.getLoBits(U)))
+      return false;
+  }
+
+  IRBuilder<> IRB(BB, In);
+  Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
+                                   : Intrinsic::hexagon_S2_extractup;
+  Module *Mod = BB->getParent()->getParent();
+  Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
+  Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
+  if (SL != 0)
+    NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
+  In->replaceAllUsesWith(NewIn);
+  return true;
+}
+
+
+bool HexagonGenExtract::visitBlock(BasicBlock *B) {
+  // Depth-first, bottom-up traversal.
+  DomTreeNode *DTN = DT->getNode(B);
+  typedef GraphTraits<DomTreeNode*> GTN;
+  typedef GTN::ChildIteratorType Iter;
+  for (Iter I = GTN::child_begin(DTN), E = GTN::child_end(DTN); I != E; ++I)
+    visitBlock((*I)->getBlock());
+
+  // Allow limiting the number of generated extracts for debugging purposes.
+  bool HasCutoff = ExtractCutoff.getPosition();
+  unsigned Cutoff = ExtractCutoff;
+
+  bool Changed = false;
+  BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
+  while (true) {
+    if (HasCutoff && (ExtractCount >= Cutoff))
+      return Changed;
+    bool Last = (I == Begin);
+    if (!Last)
+      NextI = std::prev(I);
+    Instruction *In = &*I;
+    bool Done = convert(In);
+    if (HasCutoff && Done)
+      ExtractCount++;
+    Changed |= Done;
+    if (Last)
+      break;
+    I = NextI;
+  }
+  return Changed;
+}
+
+
+bool HexagonGenExtract::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  bool Changed;
+
+  // Traverse the function bottom-up, to see super-expressions before their
+  // sub-expressions.
+  BasicBlock *Entry = GraphTraits<Function*>::getEntryNode(&F);
+  Changed = visitBlock(Entry);
+
+  return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenExtract() {
+  return new HexagonGenExtract();
+}
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
new file mode 100644
index 000000000000..096da949e77b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -0,0 +1,1598 @@
+//===--- HexagonGenInsert.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexinsert"
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+#include <map>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation."));
+// The distance cutoff is selected based on the precheckin-perf results:
+// cutoffs 20, 25, 35, and 40 are worse than 30.
+static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert "
+  "generation."));
+
+static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore, cl::desc("Enable timing of insert generation"));
+static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert "
+  "generation"));
+
+static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore);
+static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore);
+// Whether to construct constant values via "insert". Could eliminate constant
+// extenders, but often not practical.
+static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore);
+
+namespace {
+  // The preprocessor gets confused when the DEBUG macro is passed larger
+  // chunks of code. Use this function to detect debugging.
+  inline bool isDebug() {
+#ifndef NDEBUG
+    return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE);
+#else
+    return false;
+#endif
+  }
+}
+
+
+namespace {
+  // Set of virtual registers, based on BitVector.
+  struct RegisterSet : private BitVector {
+    RegisterSet() : BitVector() {}
+    explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+    RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+
+    using BitVector::clear;
+
+    unsigned find_first() const {
+      int First = BitVector::find_first();
+      if (First < 0)
+        return 0;
+      return x2v(First);
+    }
+
+    unsigned find_next(unsigned Prev) const {
+      int Next = BitVector::find_next(v2x(Prev));
+      if (Next < 0)
+        return 0;
+      return x2v(Next);
+    }
+
+    RegisterSet &insert(unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return static_cast<RegisterSet&>(BitVector::set(Idx));
+    }
+    RegisterSet &remove(unsigned R) {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return *this;
+      return static_cast<RegisterSet&>(BitVector::reset(Idx));
+    }
+
+    RegisterSet &insert(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+    }
+    RegisterSet &remove(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::reset(Rs));
+    }
+
+    reference operator[](unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return BitVector::operator[](Idx);
+    }
+    bool operator[](unsigned R) const {
+      unsigned Idx = v2x(R);
+      assert(Idx < size());
+      return BitVector::operator[](Idx);
+    }
+    bool has(unsigned R) const {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return false;
+      return BitVector::test(Idx);
+    }
+
+    bool empty() const {
+      return !BitVector::any();
+    }
+    bool includes(const RegisterSet &Rs) const {
+      // A.BitVector::test(B)  <=>  A-B != {}
+      return !Rs.BitVector::test(*this);
+    }
+    bool intersects(const RegisterSet &Rs) const {
+      return BitVector::anyCommon(Rs);
+    }
+
+  private:
+    void ensure(unsigned Idx) {
+      if (size() <= Idx)
+        resize(std::max(Idx+1, 32U));
+    }
+    static inline unsigned v2x(unsigned v) {
+      return TargetRegisterInfo::virtReg2Index(v);
+    }
+    static inline unsigned x2v(unsigned x) {
+      return TargetRegisterInfo::index2VirtReg(x);
+    }
+  };
+
+
+  struct PrintRegSet {
+    PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+      : RS(S), TRI(RI) {}
+    friend raw_ostream &operator<< (raw_ostream &OS,
+          const PrintRegSet &P);
+  private:
+    const RegisterSet &RS;
+    const TargetRegisterInfo *TRI;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+    OS << '{';
+    for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+      OS << ' ' << PrintReg(R, P.TRI);
+    OS << " }";
+    return OS;
+  }
+}
+
+
+namespace {
+  // A convenience class to associate unsigned numbers (such as virtual
+  // registers) with unsigned numbers.
+  struct UnsignedMap : public DenseMap<unsigned,unsigned> {
+    UnsignedMap() : BaseType() {}
+  private:
+    typedef DenseMap<unsigned,unsigned> BaseType;
+  };
+
+  // A utility to establish an ordering between virtual registers:
+  // VRegA < VRegB  <=>  RegisterOrdering[VRegA] < RegisterOrdering[VRegB]
+  // This is meant as a cache for the ordering of virtual registers defined
+  // by a potentially expensive comparison function, or obtained by a proce-
+  // dure that should not be repeated each time two registers are compared.
+  struct RegisterOrdering : public UnsignedMap {
+    RegisterOrdering() : UnsignedMap() {}
+    unsigned operator[](unsigned VR) const {
+      const_iterator F = find(VR);
+      assert(F != end());
+      return F->second;
+    }
+    // Add operator(), so that objects of this class can be used as
+    // comparators in std::sort et al.
+    bool operator() (unsigned VR1, unsigned VR2) const {
+      return operator[](VR1) < operator[](VR2);
+    }
+  };
+}
+
+
+namespace {
+  // Ordering of bit values. This class does not have operator[], but
+  // is supplies a comparison operator() for use in std:: algorithms.
+  // The order is as follows:
+  // - 0 < 1 < ref
+  // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg),
+  //   or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
+  struct BitValueOrdering {
+    BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
+    bool operator() (const BitTracker::BitValue &V1,
+          const BitTracker::BitValue &V2) const;
+    const RegisterOrdering &BaseOrd;
+  };
+}
+
+
+bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
+      const BitTracker::BitValue &V2) const {
+  if (V1 == V2)
+    return false;
+  // V1==0 => true, V2==0 => false
+  if (V1.is(0) || V2.is(0))
+    return V1.is(0);
+  // Neither of V1,V2 is 0, and V1!=V2.
+  // V2==1 => false, V1==1 => true
+  if (V2.is(1) || V1.is(1))
+    return !V2.is(1);
+  // Both V1,V2 are refs.
+  unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg];
+  if (Ind1 != Ind2)
+    return Ind1 < Ind2;
+  // If V1.Pos==V2.Pos
+  assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different");
+  return V1.RefI.Pos < V2.RefI.Pos;
+}
+
+
+namespace {
+  // Cache for the BitTracker's cell map. Map lookup has a logarithmic
+  // complexity, this class will memoize the lookup results to reduce
+  // the access time for repeated lookups of the same cell.
+  struct CellMapShadow {
+    CellMapShadow(const BitTracker &T) : BT(T) {}
+    const BitTracker::RegisterCell &lookup(unsigned VR) {
+      unsigned RInd = TargetRegisterInfo::virtReg2Index(VR);
+      // Grow the vector to at least 32 elements.
+      if (RInd >= CVect.size())
+        CVect.resize(std::max(RInd+16, 32U), 0);
+      const BitTracker::RegisterCell *CP = CVect[RInd];
+      if (CP == 0)
+        CP = CVect[RInd] = &BT.lookup(VR);
+      return *CP;
+    }
+
+    const BitTracker &BT;
+
+  private:
+    typedef std::vector<const BitTracker::RegisterCell*> CellVectType;
+    CellVectType CVect;
+  };
+}
+
+
+namespace {
+  // Comparator class for lexicographic ordering of virtual registers
+  // according to the corresponding BitTracker::RegisterCell objects.
+  struct RegisterCellLexCompare {
+    RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
+      : BitOrd(BO), CM(M) {}
+    bool operator() (unsigned VR1, unsigned VR2) const;
+  private:
+    const BitValueOrdering &BitOrd;
+    CellMapShadow &CM;
+  };
+
+  // Comparator class for lexicographic ordering of virtual registers
+  // according to the specified bits of the corresponding BitTracker::
+  // RegisterCell objects.
+  // Specifically, this class will be used to compare bit B of a register
+  // cell for a selected virtual register R with bit N of any register
+  // other than R.
+  struct RegisterCellBitCompareSel {
+    RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
+          const BitValueOrdering &BO, CellMapShadow &M)
+      : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
+    bool operator() (unsigned VR1, unsigned VR2) const;
+  private:
+    const unsigned SelR, SelB;
+    const unsigned BitN;
+    const BitValueOrdering &BitOrd;
+    CellMapShadow &CM;
+  };
+}
+
+
+bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
+  // Ordering of registers, made up from two given orderings:
+  // - the ordering of the register numbers, and
+  // - the ordering of register cells.
+  // Def. R1 < R2 if:
+  // - cell(R1) < cell(R2), or
+  // - cell(R1) == cell(R2), and index(R1) < index(R2).
+  //
+  // For register cells, the ordering is lexicographic, with index 0 being
+  // the most significant.
+  if (VR1 == VR2)
+    return false;
+
+  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2);
+  uint16_t W1 = RC1.width(), W2 = RC2.width();
+  for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) {
+    const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i];
+    if (V1 != V2)
+      return BitOrd(V1, V2);
+  }
+  // Cells are equal up until the common length.
+  if (W1 != W2)
+    return W1 < W2;
+
+  return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
+}
+
+
+bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
+  if (VR1 == VR2)
+    return false;
+  const BitTracker::RegisterCell &RC1 = CM.lookup(VR1);
+  const BitTracker::RegisterCell &RC2 = CM.lookup(VR2);
+  uint16_t W1 = RC1.width(), W2 = RC2.width();
+  uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN;
+  uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN;
+  // If Bit1 exceeds the width of VR1, then:
+  // - return false, if at the same time Bit2 exceeds VR2, or
+  // - return true, otherwise.
+  // (I.e. "a bit value that does not exist is less than any bit value
+  // that does exist".)
+  if (W1 <= Bit1)
+    return Bit2 < W2;
+  // If Bit1 is within VR1, but Bit2 is not within VR2, return false.
+  if (W2 <= Bit2)
+    return false;
+
+  const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2];
+  if (V1 != V2)
+    return BitOrd(V1, V2);
+  return false;
+}
+
+
+namespace {
+  class OrderedRegisterList {
+    typedef std::vector<unsigned> ListType;
+  public:
+    OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {}
+    void insert(unsigned VR);
+    void remove(unsigned VR);
+    unsigned operator[](unsigned Idx) const {
+      assert(Idx < Seq.size());
+      return Seq[Idx];
+    }
+    unsigned size() const {
+      return Seq.size();
+    }
+
+    typedef ListType::iterator iterator;
+    typedef ListType::const_iterator const_iterator;
+    iterator begin() { return Seq.begin(); }
+    iterator end() { return Seq.end(); }
+    const_iterator begin() const { return Seq.begin(); }
+    const_iterator end() const { return Seq.end(); }
+
+    // Convenience function to convert an iterator to the corresponding index.
+    unsigned idx(iterator It) const { return It-begin(); }
+  private:
+    ListType Seq;
+    const RegisterOrdering &Ord;
+  };
+
+
+  struct PrintORL {
+    PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
+      : RL(L), TRI(RI) {}
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
+  private:
+    const OrderedRegisterList &RL;
+    const TargetRegisterInfo *TRI;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) {
+    OS << '(';
+    OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end();
+    for (OrderedRegisterList::const_iterator I = B; I != E; ++I) {
+      if (I != B)
+        OS << ", ";
+      OS << PrintReg(*I, P.TRI);
+    }
+    OS << ')';
+    return OS;
+  }
+}
+
+
+void OrderedRegisterList::insert(unsigned VR) {
+  iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+  if (L == Seq.end())
+    Seq.push_back(VR);
+  else
+    Seq.insert(L, VR);
+}
+
+
+void OrderedRegisterList::remove(unsigned VR) {
+  iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
+  assert(L != Seq.end());
+  Seq.erase(L);
+}
+
+
+namespace {
+  // A record of the insert form. The fields correspond to the operands
+  // of the "insert" instruction:
+  // ... = insert(SrcR, InsR, #Wdh, #Off)
+  struct IFRecord {
+    IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
+      : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
+    unsigned SrcR, InsR;
+    uint16_t Wdh, Off;
+  };
+
+  struct PrintIFR {
+    PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
+      : IFR(R), TRI(RI) {}
+  private:
+    const IFRecord &IFR;
+    const TargetRegisterInfo *TRI;
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
+    unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR;
+    OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI)
+       << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')';
+    return OS;
+  }
+
+  typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet;
+}
+
+
+namespace llvm {
+  void initializeHexagonGenInsertPass(PassRegistry&);
+  FunctionPass *createHexagonGenInsert();
+}
+
+
+namespace {
+  class HexagonGenInsert : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) {
+      initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "Hexagon generate \"insert\" instructions";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType;
+
+    void buildOrderingMF(RegisterOrdering &RO) const;
+    void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const;
+    bool isIntClass(const TargetRegisterClass *RC) const;
+    bool isConstant(unsigned VR) const;
+    bool isSmallConstant(unsigned VR) const;
+    bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR,
+          uint16_t L, uint16_t S) const;
+    bool findSelfReference(unsigned VR) const;
+    bool findNonSelfReference(unsigned VR) const;
+    void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const;
+    void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const;
+    unsigned distance(const MachineBasicBlock *FromB,
+          const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+          PairMapType &M) const;
+    unsigned distance(MachineBasicBlock::const_iterator FromI,
+          MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+          PairMapType &M) const;
+    bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs);
+    void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs);
+    void findRemovableRegisters(unsigned VR, IFRecord IF,
+          RegisterSet &RMs) const;
+    void computeRemovableRegisters();
+
+    void pruneEmptyLists();
+    void pruneCoveredSets(unsigned VR);
+    void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M);
+    void pruneRegCopies(unsigned VR);
+    void pruneCandidates();
+    void selectCandidates();
+    bool generateInserts();
+
+    bool removeDeadCode(MachineDomTreeNode *N);
+
+    // IFRecord coupled with a set of potentially removable registers:
+    typedef std::vector<IFRecordWithRegSet> IFListType;
+    typedef DenseMap<unsigned,IFListType> IFMapType;  // vreg -> IFListType
+
+    void dump_map() const;
+
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
+
+    MachineFunction *MFN;
+    MachineRegisterInfo *MRI;
+    MachineDominatorTree *MDT;
+    CellMapShadow *CMS;
+
+    RegisterOrdering BaseOrd;
+    RegisterOrdering CellOrd;
+    IFMapType IFMap;
+  };
+
+  char HexagonGenInsert::ID = 0;
+}
+
+
+void HexagonGenInsert::dump_map() const {
+  typedef IFMapType::const_iterator iterator;
+  for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    dbgs() << "  " << PrintReg(I->first, HRI) << ":\n";
+    const IFListType &LL = I->second;
+    for (unsigned i = 0, n = LL.size(); i < n; ++i)
+      dbgs() << "    " << PrintIFR(LL[i].first, HRI) << ", "
+             << PrintRegSet(LL[i].second, HRI) << '\n';
+  }
+}
+
+
+void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
+  unsigned Index = 0;
+  typedef MachineFunction::const_iterator mf_iterator;
+  for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) {
+    const MachineBasicBlock &B = *A;
+    if (!CMS->BT.reached(&B))
+      continue;
+    typedef MachineBasicBlock::const_iterator mb_iterator;
+    for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+      const MachineInstr *MI = &*I;
+      for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.isDef()) {
+          unsigned R = MO.getReg();
+          assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
+          if (TargetRegisterInfo::isVirtualRegister(R))
+            RO.insert(std::make_pair(R, Index++));
+        }
+      }
+    }
+  }
+  // Since some virtual registers may have had their def and uses eliminated,
+  // they are no longer referenced in the code, and so they will not appear
+  // in the map.
+}
+
+
+void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
+      RegisterOrdering &RO) const {
+  // Create a vector of all virtual registers (collect them from the base
+  // ordering RB), and then sort it using the RegisterCell comparator.
+  BitValueOrdering BVO(RB);
+  RegisterCellLexCompare LexCmp(BVO, *CMS);
+  typedef std::vector<unsigned> SortableVectorType;
+  SortableVectorType VRs;
+  for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
+    VRs.push_back(I->first);
+  std::sort(VRs.begin(), VRs.end(), LexCmp);
+  // Transfer the results to the outgoing register ordering.
+  for (unsigned i = 0, n = VRs.size(); i < n; ++i)
+    RO.insert(std::make_pair(VRs[i], i));
+}
+
+
+inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
+  return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
+}
+
+
+bool HexagonGenInsert::isConstant(unsigned VR) const {
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  uint16_t W = RC.width();
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitTracker::BitValue &BV = RC[i];
+    if (BV.is(0) || BV.is(1))
+      continue;
+    return false;
+  }
+  return true;
+}
+
+
+bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  uint16_t W = RC.width();
+  if (W > 64)
+    return false;
+  uint64_t V = 0, B = 1;
+  for (uint16_t i = 0; i < W; ++i) {
+    const BitTracker::BitValue &BV = RC[i];
+    if (BV.is(1))
+      V |= B;
+    else if (!BV.is(0))
+      return false;
+    B <<= 1;
+  }
+
+  // For 32-bit registers, consider: Rd = #s16.
+  if (W == 32)
+    return isInt<16>(V);
+
+  // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8)
+  return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
+}
+
+
+bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
+      unsigned InsR, uint16_t L, uint16_t S) const {
+  const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
+  const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR);
+  const TargetRegisterClass *InsRC = MRI->getRegClass(InsR);
+  // Only integet (32-/64-bit) register classes.
+  if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC))
+    return false;
+  // The "source" register must be of the same class as DstR.
+  if (DstRC != SrcRC)
+    return false;
+  if (DstRC == InsRC)
+    return true;
+  // A 64-bit register can only be generated from other 64-bit registers.
+  if (DstRC == &Hexagon::DoubleRegsRegClass)
+    return false;
+  // Otherwise, the L and S cannot span 32-bit word boundary.
+  if (S < 32 && S+L > 32)
+    return false;
+  return true;
+}
+
+
+bool HexagonGenInsert::findSelfReference(unsigned VR) const {
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = RC[i];
+    if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR)
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
+  BitTracker::RegisterCell RC = CMS->lookup(VR);
+  for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = RC[i];
+    if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR)
+      return true;
+  }
+  return false;
+}
+
+
+void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
+      RegisterSet &Defs) const {
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned R = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Defs.insert(R);
+  }
+}
+
+
+void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
+      RegisterSet &Uses) const {
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned R = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.insert(R);
+  }
+}
+
+
+unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
+      const MachineBasicBlock *ToB, const UnsignedMap &RPO,
+      PairMapType &M) const {
+  // Forward distance from the end of a block to the beginning of it does
+  // not make sense. This function should not be called with FromB == ToB.
+  assert(FromB != ToB);
+
+  unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber();
+  // If we have already computed it, return the cached result.
+  PairMapType::iterator F = M.find(std::make_pair(FromN, ToN));
+  if (F != M.end())
+    return F->second;
+  unsigned ToRPO = RPO.lookup(ToN);
+
+  unsigned MaxD = 0;
+  typedef MachineBasicBlock::const_pred_iterator pred_iterator;
+  for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) {
+    const MachineBasicBlock *PB = *I;
+    // Skip back edges. Also, if FromB is a predecessor of ToB, the distance
+    // along that path will be 0, and we don't need to do any calculations
+    // on it.
+    if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO)
+      continue;
+    unsigned D = PB->size() + distance(FromB, PB, RPO, M);
+    if (D > MaxD)
+      MaxD = D;
+  }
+
+  // Memoize the result for later lookup.
+  M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD));
+  return MaxD;
+}
+
+
+unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
+      MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
+      PairMapType &M) const {
+  const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent();
+  if (FB == TB)
+    return std::distance(FromI, ToI);
+  unsigned D1 = std::distance(TB->begin(), ToI);
+  unsigned D2 = distance(FB, TB, RPO, M);
+  unsigned D3 = std::distance(FromI, FB->end());
+  return D1+D2+D3;
+}
+
+
+bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
+      OrderedRegisterList &AVs) {
+  if (isDebug()) {
+    dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI)
+           << "  AVs: " << PrintORL(AVs, HRI) << "\n";
+  }
+  if (AVs.size() == 0)
+    return false;
+
+  typedef OrderedRegisterList::iterator iterator;
+  BitValueOrdering BVO(BaseOrd);
+  const BitTracker::RegisterCell &RC = CMS->lookup(VR);
+  uint16_t W = RC.width();
+
+  typedef std::pair<unsigned,uint16_t> RSRecord;  // (reg,shift)
+  typedef std::vector<RSRecord> RSListType;
+  // Have a map, with key being the matching prefix length, and the value
+  // being the list of pairs (R,S), where R's prefix matches VR at S.
+  // (DenseMap<uint16_t,RSListType> fails to instantiate.)
+  typedef DenseMap<unsigned,RSListType> LRSMapType;
+  LRSMapType LM;
+
+  // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S,
+  // and find matching prefixes from AVs with the rotated RC. Such a prefix
+  // would match a string of bits (of length L) in RC starting at S.
+  for (uint16_t S = 0; S < W; ++S) {
+    iterator B = AVs.begin(), E = AVs.end();
+    // The registers in AVs are ordered according to the lexical order of
+    // the corresponding register cells. This means that the range of regis-
+    // ters in AVs that match a prefix of length L+1 will be contained in
+    // the range that matches a prefix of length L. This means that we can
+    // keep narrowing the search space as the prefix length goes up. This
+    // helps reduce the overall complexity of the search.
+    uint16_t L;
+    for (L = 0; L < W-S; ++L) {
+      // Compare against VR's bits starting at S, which emulates rotation
+      // of VR by S.
+      RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS);
+      iterator NewB = std::lower_bound(B, E, VR, RCB);
+      iterator NewE = std::upper_bound(NewB, E, VR, RCB);
+      // For the registers that are eliminated from the next range, L is
+      // the longest prefix matching VR at position S (their prefixes
+      // differ from VR at S+L). If L>0, record this information for later
+      // use.
+      if (L > 0) {
+        for (iterator I = B; I != NewB; ++I)
+          LM[L].push_back(std::make_pair(*I, S));
+        for (iterator I = NewE; I != E; ++I)
+          LM[L].push_back(std::make_pair(*I, S));
+      }
+      B = NewB, E = NewE;
+      if (B == E)
+        break;
+    }
+    // Record the final register range. If this range is non-empty, then
+    // L=W-S.
+    assert(B == E || L == W-S);
+    if (B != E) {
+      for (iterator I = B; I != E; ++I)
+        LM[L].push_back(std::make_pair(*I, S));
+      // If B!=E, then we found a range of registers whose prefixes cover the
+      // rest of VR from position S. There is no need to further advance S.
+      break;
+    }
+  }
+
+  if (isDebug()) {
+    dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n";
+    for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) {
+      dbgs() << "  L=" << I->first << ':';
+      const RSListType &LL = I->second;
+      for (unsigned i = 0, n = LL.size(); i < n; ++i)
+        dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@"
+               << LL[i].second << ')';
+      dbgs() << '\n';
+    }
+  }
+
+
+  bool Recorded = false;
+
+  for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) {
+    unsigned SrcR = *I;
+    int FDi = -1, LDi = -1;   // First/last different bit.
+    const BitTracker::RegisterCell &AC = CMS->lookup(SrcR);
+    uint16_t AW = AC.width();
+    for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) {
+      if (RC[i] == AC[i])
+        continue;
+      if (FDi == -1)
+        FDi = i;
+      LDi = i;
+    }
+    if (FDi == -1)
+      continue;  // TODO (future): Record identical registers.
+    // Look for a register whose prefix could patch the range [FD..LD]
+    // where VR and SrcR differ.
+    uint16_t FD = FDi, LD = LDi;  // Switch to unsigned type.
+    uint16_t MinL = LD-FD+1;
+    for (uint16_t L = MinL; L < W; ++L) {
+      LRSMapType::iterator F = LM.find(L);
+      if (F == LM.end())
+        continue;
+      RSListType &LL = F->second;
+      for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+        uint16_t S = LL[i].second;
+        // MinL is the minimum length of the prefix. Any length above MinL
+        // allows some flexibility as to where the prefix can start:
+        // given the extra length EL=L-MinL, the prefix must start between
+        // max(0,FD-EL) and FD.
+        if (S > FD)   // Starts too late.
+          continue;
+        uint16_t EL = L-MinL;
+        uint16_t LowS = (EL < FD) ? FD-EL : 0;
+        if (S < LowS) // Starts too early.
+          continue;
+        unsigned InsR = LL[i].first;
+        if (!isValidInsertForm(VR, SrcR, InsR, L, S))
+          continue;
+        if (isDebug()) {
+          dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI)
+                 << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#"
+                 << S << ")\n";
+        }
+        IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet());
+        IFMap[VR].push_back(RR);
+        Recorded = true;
+      }
+    }
+  }
+
+  return Recorded;
+}
+
+
+void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
+      OrderedRegisterList &AVs) {
+  if (isDebug())
+    dbgs() << "visiting block BB#" << B->getNumber() << "\n";
+
+  // First, check if this block is reachable at all. If not, the bit tracker
+  // will not have any information about registers in it.
+  if (!CMS->BT.reached(B))
+    return;
+
+  bool DoConst = OptConst;
+  // Keep a separate set of registers defined in this block, so that we
+  // can remove them from the list of available registers once all DT
+  // successors have been processed.
+  RegisterSet BlockDefs, InsDefs;
+  for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    InsDefs.clear();
+    getInstrDefs(MI, InsDefs);
+    // Leave those alone. They are more transparent than "insert".
+    bool Skip = MI->isCopy() || MI->isRegSequence();
+
+    if (!Skip) {
+      // Visit all defined registers, and attempt to find the corresponding
+      // "insert" representations.
+      for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) {
+        // Do not collect registers that are known to be compile-time cons-
+        // tants, unless requested.
+        if (!DoConst && isConstant(VR))
+          continue;
+        // If VR's cell contains a reference to VR, then VR cannot be defined
+        // via "insert". If VR is a constant that can be generated in a single
+        // instruction (without constant extenders), generating it via insert
+        // makes no sense.
+        if (findSelfReference(VR) || isSmallConstant(VR))
+          continue;
+
+        findRecordInsertForms(VR, AVs);
+      }
+    }
+
+    // Insert the defined registers into the list of available registers
+    // after they have been processed.
+    for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR))
+      AVs.insert(VR);
+    BlockDefs.insert(InsDefs);
+  }
+
+  MachineDomTreeNode *N = MDT->getNode(B);
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  typedef GTN::ChildIteratorType ChildIter;
+  for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+    MachineBasicBlock *SB = (*I)->getBlock();
+    collectInBlock(SB, AVs);
+  }
+
+  for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR))
+    AVs.remove(VR);
+}
+
+
+void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
+      RegisterSet &RMs) const {
+  // For a given register VR and a insert form, find the registers that are
+  // used by the current definition of VR, and which would no longer be
+  // needed for it after the definition of VR is replaced with the insert
+  // form. These are the registers that could potentially become dead.
+  RegisterSet Regs[2];
+
+  unsigned S = 0;  // Register set selector.
+  Regs[S].insert(VR);
+
+  while (!Regs[S].empty()) {
+    // Breadth-first search.
+    unsigned OtherS = 1-S;
+    Regs[OtherS].clear();
+    for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) {
+      Regs[S].remove(R);
+      if (R == IF.SrcR || R == IF.InsR)
+        continue;
+      // Check if a given register has bits that are references to any other
+      // registers. This is to detect situations where the instruction that
+      // defines register R takes register Q as an operand, but R itself does
+      // not contain any bits from Q. Loads are examples of how this could
+      // happen:
+      //   R = load Q
+      // In this case (assuming we do not have any knowledge about the loaded
+      // value), we must not treat R as a "conveyance" of the bits from Q.
+      // (The information in BT about R's bits would have them as constants,
+      // in case of zero-extending loads, or refs to R.)
+      if (!findNonSelfReference(R))
+        continue;
+      RMs.insert(R);
+      const MachineInstr *DefI = MRI->getVRegDef(R);
+      assert(DefI);
+      // Do not iterate past PHI nodes to avoid infinite loops. This can
+      // make the final set a bit less accurate, but the removable register
+      // sets are an approximation anyway.
+      if (DefI->isPHI())
+        continue;
+      getInstrUses(DefI, Regs[OtherS]);
+    }
+    S = OtherS;
+  }
+  // The register VR is added to the list as a side-effect of the algorithm,
+  // but it is not "potentially removable". A potentially removable register
+  // is one that may become unused (dead) after conversion to the insert form
+  // IF, and obviously VR (or its replacement) will not become dead by apply-
+  // ing IF.
+  RMs.remove(VR);
+}
+
+
+void HexagonGenInsert::computeRemovableRegisters() {
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    IFListType &LL = I->second;
+    for (unsigned i = 0, n = LL.size(); i < n; ++i)
+      findRemovableRegisters(I->first, LL[i].first, LL[i].second);
+  }
+}
+
+
+void HexagonGenInsert::pruneEmptyLists() {
+  // Remove all entries from the map, where the register has no insert forms
+  // associated with it.
+  typedef SmallVector<IFMapType::iterator,16> IterListType;
+  IterListType Prune;
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    if (I->second.size() == 0)
+      Prune.push_back(I);
+  }
+  for (unsigned i = 0, n = Prune.size(); i < n; ++i)
+    IFMap.erase(Prune[i]);
+}
+
+
+void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
+  IFMapType::iterator F = IFMap.find(VR);
+  assert(F != IFMap.end());
+  IFListType &LL = F->second;
+
+  // First, examine the IF candidates for register VR whose removable-regis-
+  // ter sets are empty. This means that a given candidate will not help eli-
+  // minate any registers, but since "insert" is not a constant-extendable
+  // instruction, using such a candidate may reduce code size if the defini-
+  // tion of VR is constant-extended.
+  // If there exists a candidate with a non-empty set, the ones with empty
+  // sets will not be used and can be removed.
+  MachineInstr *DefVR = MRI->getVRegDef(VR);
+  bool DefEx = HII->isConstExtended(DefVR);
+  bool HasNE = false;
+  for (unsigned i = 0, n = LL.size(); i < n; ++i) {
+    if (LL[i].second.empty())
+      continue;
+    HasNE = true;
+    break;
+  }
+  if (!DefEx || HasNE) {
+    // The definition of VR is not constant-extended, or there is a candidate
+    // with a non-empty set. Remove all candidates with empty sets.
+    auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
+      return IR.second.empty();
+    };
+    auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty);
+    if (End != LL.end())
+      LL.erase(End, LL.end());
+  } else {
+    // The definition of VR is constant-extended, and all candidates have
+    // empty removable-register sets. Pick the maximum candidate, and remove
+    // all others. The "maximum" does not have any special meaning here, it
+    // is only so that the candidate that will remain on the list is selec-
+    // ted deterministically.
+    IFRecord MaxIF = LL[0].first;
+    for (unsigned i = 1, n = LL.size(); i < n; ++i) {
+      // If LL[MaxI] < LL[i], then MaxI = i.
+      const IFRecord &IF = LL[i].first;
+      unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR];
+      unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR];
+      if (M0 > R0)
+        continue;
+      if (M0 == R0) {
+        if (M1 > R1)
+          continue;
+        if (M1 == R1) {
+          if (MaxIF.Wdh > IF.Wdh)
+            continue;
+          if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off)
+            continue;
+        }
+      }
+      // MaxIF < IF.
+      MaxIF = IF;
+    }
+    // Remove everything except the maximum candidate. All register sets
+    // are empty, so no need to preserve anything.
+    LL.clear();
+    LL.push_back(std::make_pair(MaxIF, RegisterSet()));
+  }
+
+  // Now, remove those whose sets of potentially removable registers are
+  // contained in another IF candidate for VR. For example, given these
+  // candidates for vreg45,
+  //   %vreg45:
+  //     (%vreg44,%vreg41,#9,#8), { %vreg42 }
+  //     (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 }
+  // remove the first one, since it is contained in the second one.
+  for (unsigned i = 0, n = LL.size(); i < n; ) {
+    const RegisterSet &RMi = LL[i].second;
+    unsigned j = 0;
+    while (j < n) {
+      if (j != i && LL[j].second.includes(RMi))
+        break;
+      j++;
+    }
+    if (j == n) {   // RMi not contained in anything else.
+      i++;
+      continue;
+    }
+    LL.erase(LL.begin()+i);
+    n = LL.size();
+  }
+}
+
+
+void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
+      PairMapType &M) {
+  IFMapType::iterator F = IFMap.find(VR);
+  assert(F != IFMap.end());
+  IFListType &LL = F->second;
+  unsigned Cutoff = VRegDistCutoff;
+  const MachineInstr *DefV = MRI->getVRegDef(VR);
+
+  for (unsigned i = LL.size(); i > 0; --i) {
+    unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR;
+    const MachineInstr *DefS = MRI->getVRegDef(SR);
+    const MachineInstr *DefI = MRI->getVRegDef(IR);
+    unsigned DSV = distance(DefS, DefV, RPO, M);
+    if (DSV < Cutoff) {
+      unsigned DIV = distance(DefI, DefV, RPO, M);
+      if (DIV < Cutoff)
+        continue;
+    }
+    LL.erase(LL.begin()+(i-1));
+  }
+}
+
+
+void HexagonGenInsert::pruneRegCopies(unsigned VR) {
+  IFMapType::iterator F = IFMap.find(VR);
+  assert(F != IFMap.end());
+  IFListType &LL = F->second;
+
+  auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
+    return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
+  };
+  auto End = std::remove_if(LL.begin(), LL.end(), IsCopy);
+  if (End != LL.end())
+    LL.erase(End, LL.end());
+}
+
+
+void HexagonGenInsert::pruneCandidates() {
+  // Remove candidates that are not beneficial, regardless of the final
+  // selection method.
+  // First, remove candidates whose potentially removable set is a subset
+  // of another candidate's set.
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+    pruneCoveredSets(I->first);
+
+  UnsignedMap RPO;
+  typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType;
+  RPOTType RPOT(MFN);
+  unsigned RPON = 0;
+  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I)
+    RPO[(*I)->getNumber()] = RPON++;
+
+  PairMapType Memo; // Memoization map for distance calculation.
+  // Remove candidates that would use registers defined too far away.
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+    pruneUsesTooFar(I->first, RPO, Memo);
+
+  pruneEmptyLists();
+
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I)
+    pruneRegCopies(I->first);
+}
+
+
+namespace {
+  // Class for comparing IF candidates for registers that have multiple of
+  // them. The smaller the candidate, according to this ordering, the better.
+  // First, compare the number of zeros in the associated potentially remova-
+  // ble register sets. "Zero" indicates that the register is very likely to
+  // become dead after this transformation.
+  // Second, compare "averages", i.e. use-count per size. The lower wins.
+  // After that, it does not really matter which one is smaller. Resolve
+  // the tie in some deterministic way.
+  struct IFOrdering {
+    IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
+      : UseC(UC), BaseOrd(BO) {}
+    bool operator() (const IFRecordWithRegSet &A,
+          const IFRecordWithRegSet &B) const;
+  private:
+    void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+          unsigned &Sum) const;
+    const UnsignedMap &UseC;
+    const RegisterOrdering &BaseOrd;
+  };
+}
+
+
+bool IFOrdering::operator() (const IFRecordWithRegSet &A,
+      const IFRecordWithRegSet &B) const {
+  unsigned SizeA = 0, ZeroA = 0, SumA = 0;
+  unsigned SizeB = 0, ZeroB = 0, SumB = 0;
+  stats(A.second, SizeA, ZeroA, SumA);
+  stats(B.second, SizeB, ZeroB, SumB);
+
+  // We will pick the minimum element. The more zeros, the better.
+  if (ZeroA != ZeroB)
+    return ZeroA > ZeroB;
+  // Compare SumA/SizeA with SumB/SizeB, lower is better.
+  uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA;
+  if (AvgA != AvgB)
+    return AvgA < AvgB;
+
+  // The sets compare identical so far. Resort to comparing the IF records.
+  // The actual values don't matter, this is only for determinism.
+  unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR];
+  if (OSA != OSB)
+    return OSA < OSB;
+  unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR];
+  if (OIA != OIB)
+    return OIA < OIB;
+  if (A.first.Wdh != B.first.Wdh)
+    return A.first.Wdh < B.first.Wdh;
+  return A.first.Off < B.first.Off;
+}
+
+
+void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
+      unsigned &Sum) const {
+  for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
+    UnsignedMap::const_iterator F = UseC.find(R);
+    assert(F != UseC.end());
+    unsigned UC = F->second;
+    if (UC == 0)
+      Zero++;
+    Sum += UC;
+    Size++;
+  }
+}
+
+
+void HexagonGenInsert::selectCandidates() {
+  // Some registers may have multiple valid candidates. Pick the best one
+  // (or decide not to use any).
+
+  // Compute the "removability" measure of R:
+  // For each potentially removable register R, record the number of regis-
+  // ters with IF candidates, where R appears in at least one set.
+  RegisterSet AllRMs;
+  UnsignedMap UseC, RemC;
+  IFMapType::iterator End = IFMap.end();
+
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    const IFListType &LL = I->second;
+    RegisterSet TT;
+    for (unsigned i = 0, n = LL.size(); i < n; ++i)
+      TT.insert(LL[i].second);
+    for (unsigned R = TT.find_first(); R; R = TT.find_next(R))
+      RemC[R]++;
+    AllRMs.insert(TT);
+  }
+
+  for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) {
+    typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
+    typedef SmallSet<const MachineInstr*,16> InstrSet;
+    InstrSet UIs;
+    // Count as the number of instructions in which R is used, not the
+    // number of operands.
+    use_iterator E = MRI->use_nodbg_end();
+    for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I)
+      UIs.insert(I->getParent());
+    unsigned C = UIs.size();
+    // Calculate a measure, which is the number of instructions using R,
+    // minus the "removability" count computed earlier.
+    unsigned D = RemC[R];
+    UseC[R] = (C > D) ? C-D : 0;  // doz
+  }
+
+
+  bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
+  if (!SelectAll0 && !SelectHas0)
+    SelectAll0 = true;
+
+  // The smaller the number UseC for a given register R, the "less used"
+  // R is aside from the opportunities for removal offered by generating
+  // "insert" instructions.
+  // Iterate over the IF map, and for those registers that have multiple
+  // candidates, pick the minimum one according to IFOrdering.
+  IFOrdering IFO(UseC, BaseOrd);
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    IFListType &LL = I->second;
+    if (LL.empty())
+      continue;
+    // Get the minimum element, remember it and clear the list. If the
+    // element found is adequate, we will put it back on the list, other-
+    // wise the list will remain empty, and the entry for this register
+    // will be removed (i.e. this register will not be replaced by insert).
+    IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO);
+    assert(MinI != LL.end());
+    IFRecordWithRegSet M = *MinI;
+    LL.clear();
+
+    // We want to make sure that this replacement will have a chance to be
+    // beneficial, and that means that we want to have indication that some
+    // register will be removed. The most likely registers to be eliminated
+    // are the use operands in the definition of I->first. Accept/reject a
+    // candidate based on how many of its uses it can potentially eliminate.
+
+    RegisterSet Us;
+    const MachineInstr *DefI = MRI->getVRegDef(I->first);
+    getInstrUses(DefI, Us);
+    bool Accept = false;
+
+    if (SelectAll0) {
+      bool All0 = true;
+      for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+        if (UseC[R] == 0)
+          continue;
+        All0 = false;
+        break;
+      }
+      Accept = All0;
+    } else if (SelectHas0) {
+      bool Has0 = false;
+      for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) {
+        if (UseC[R] != 0)
+          continue;
+        Has0 = true;
+        break;
+      }
+      Accept = Has0;
+    }
+    if (Accept)
+      LL.push_back(M);
+  }
+
+  // Remove candidates that add uses of removable registers, unless the
+  // removable registers are among replacement candidates.
+  // Recompute the removable registers, since some candidates may have
+  // been eliminated.
+  AllRMs.clear();
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    const IFListType &LL = I->second;
+    if (LL.size() > 0)
+      AllRMs.insert(LL[0].second);
+  }
+  for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
+    IFListType &LL = I->second;
+    if (LL.size() == 0)
+      continue;
+    unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
+    if (AllRMs[SR] || AllRMs[IR])
+      LL.clear();
+  }
+
+  pruneEmptyLists();
+}
+
+
+bool HexagonGenInsert::generateInserts() {
+  // Create a new register for each one from IFMap, and store them in the
+  // map.
+  UnsignedMap RegMap;
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    unsigned VR = I->first;
+    const TargetRegisterClass *RC = MRI->getRegClass(VR);
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    RegMap[VR] = NewVR;
+  }
+
+  // We can generate the "insert" instructions using potentially stale re-
+  // gisters: SrcR and InsR for a given VR may be among other registers that
+  // are also replaced. This is fine, we will do the mass "rauw" a bit later.
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    MachineInstr *MI = MRI->getVRegDef(I->first);
+    MachineBasicBlock &B = *MI->getParent();
+    DebugLoc DL = MI->getDebugLoc();
+    unsigned NewR = RegMap[I->first];
+    bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass;
+    const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert)
+                               : HII->get(Hexagon::S2_insertp);
+    IFRecord IF = I->second[0].first;
+    unsigned Wdh = IF.Wdh, Off = IF.Off;
+    unsigned InsS = 0;
+    if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
+      InsS = Hexagon::subreg_loreg;
+      if (Off >= 32) {
+        InsS = Hexagon::subreg_hireg;
+        Off -= 32;
+      }
+    }
+    // Advance to the proper location for inserting instructions. This could
+    // be B.end().
+    MachineBasicBlock::iterator At = MI;
+    if (MI->isPHI())
+      At = B.getFirstNonPHI();
+
+    BuildMI(B, At, DL, D, NewR)
+      .addReg(IF.SrcR)
+      .addReg(IF.InsR, 0, InsS)
+      .addImm(Wdh)
+      .addImm(Off);
+
+    MRI->clearKillFlags(IF.SrcR);
+    MRI->clearKillFlags(IF.InsR);
+  }
+
+  for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+    MachineInstr *DefI = MRI->getVRegDef(I->first);
+    MRI->replaceRegWith(I->first, RegMap[I->first]);
+    DefI->eraseFromParent();
+  }
+
+  return true;
+}
+
+
+bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
+  bool Changed = false;
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+    Changed |= removeDeadCode(*I);
+
+  MachineBasicBlock *B = N->getBlock();
+  std::vector<MachineInstr*> Instrs;
+  for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+    Instrs.push_back(&*I);
+
+  for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+    unsigned Opc = MI->getOpcode();
+    // Do not touch lifetime markers. This is why the target-independent DCE
+    // cannot be used.
+    if (Opc == TargetOpcode::LIFETIME_START ||
+        Opc == TargetOpcode::LIFETIME_END)
+      continue;
+    bool Store = false;
+    if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store))
+      continue;
+
+    bool AllDead = true;
+    SmallVector<unsigned,2> Regs;
+    for (ConstMIOperands Op(MI); Op.isValid(); ++Op) {
+      if (!Op->isReg() || !Op->isDef())
+        continue;
+      unsigned R = Op->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R) ||
+          !MRI->use_nodbg_empty(R)) {
+        AllDead = false;
+        break;
+      }
+      Regs.push_back(R);
+    }
+    if (!AllDead)
+      continue;
+
+    B->erase(MI);
+    for (unsigned I = 0, N = Regs.size(); I != N; ++I)
+      MRI->markUsesInDebugValueAsUndef(Regs[I]);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
+  bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
+  bool Changed = false;
+  TimerGroup __G("hexinsert");
+  NamedRegionTimer __T("hexinsert", Timing && !TimingDetail);
+
+  // Sanity check: one, but not both.
+  assert(!OptSelectAll0 || !OptSelectHas0);
+
+  IFMap.clear();
+  BaseOrd.clear();
+  CellOrd.clear();
+
+  const auto &ST = MF.getSubtarget<HexagonSubtarget>();
+  HII = ST.getInstrInfo();
+  HRI = ST.getRegisterInfo();
+  MFN = &MF;
+  MRI = &MF.getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+
+  // Clean up before any further processing, so that dead code does not
+  // get used in a newly generated "insert" instruction. Have a custom
+  // version of DCE that preserves lifetime markers. Without it, merging
+  // of stack objects can fail to recognize and merge disjoint objects
+  // leading to unnecessary stack growth.
+  Changed |= removeDeadCode(MDT->getRootNode());
+
+  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+  BitTracker BTLoc(HE, MF);
+  BTLoc.trace(isDebug());
+  BTLoc.run();
+  CellMapShadow MS(BTLoc);
+  CMS = &MS;
+
+  buildOrderingMF(BaseOrd);
+  buildOrderingBT(BaseOrd, CellOrd);
+
+  if (isDebug()) {
+    dbgs() << "Cell ordering:\n";
+    for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end();
+        I != E; ++I) {
+      unsigned VR = I->first, Pos = I->second;
+      dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n";
+    }
+  }
+
+  // Collect candidates for conversion into the insert forms.
+  MachineBasicBlock *RootB = MDT->getRoot();
+  OrderedRegisterList AvailR(CellOrd);
+
+  {
+    NamedRegionTimer _T("collection", "hexinsert", TimingDetail);
+    collectInBlock(RootB, AvailR);
+    // Complete the information gathered in IFMap.
+    computeRemovableRegisters();
+  }
+
+  if (isDebug()) {
+    dbgs() << "Candidates after collection:\n";
+    dump_map();
+  }
+
+  if (IFMap.empty())
+    return false;
+
+  {
+    NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
+    pruneCandidates();
+  }
+
+  if (isDebug()) {
+    dbgs() << "Candidates after pruning:\n";
+    dump_map();
+  }
+
+  if (IFMap.empty())
+    return false;
+
+  {
+    NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
+    selectCandidates();
+  }
+
+  if (isDebug()) {
+    dbgs() << "Candidates after selection:\n";
+    dump_map();
+  }
+
+  // Filter out vregs beyond the cutoff.
+  if (VRegIndexCutoff.getPosition()) {
+    unsigned Cutoff = VRegIndexCutoff;
+    typedef SmallVector<IFMapType::iterator,16> IterListType;
+    IterListType Out;
+    for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
+      unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first);
+      if (Idx >= Cutoff)
+        Out.push_back(I);
+    }
+    for (unsigned i = 0, n = Out.size(); i < n; ++i)
+      IFMap.erase(Out[i]);
+  }
+
+  {
+    NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
+    Changed = generateInserts();
+  }
+
+  return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenInsert() {
+  return new HexagonGenInsert();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert",
+  "Hexagon generate \"insert\" instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert",
+  "Hexagon generate \"insert\" instructions", false, false)
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
new file mode 100644
index 000000000000..6905c4f6d125
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -0,0 +1,525 @@
+//===--- HexagonGenPredicate.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gen-pred"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeHexagonGenPredicatePass(PassRegistry& Registry);
+  FunctionPass *createHexagonGenPredicate();
+}
+
+namespace {
+  struct Register {
+    unsigned R, S;
+    Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
+    Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+    bool operator== (const Register &Reg) const {
+      return R == Reg.R && S == Reg.S;
+    }
+    bool operator< (const Register &Reg) const {
+      return R < Reg.R || (R == Reg.R && S < Reg.S);
+    }
+  };
+  struct PrintRegister {
+    PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR);
+  private:
+    Register Reg;
+    const TargetRegisterInfo &TRI;
+  };
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
+    LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
+    return OS << PrintReg(PR.Reg.R, &PR.TRI, PR.Reg.S);
+  }
+
+  class HexagonGenPredicate : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) {
+      initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "Hexagon generate predicate operations";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    typedef SetVector<MachineInstr*> VectOfInst;
+    typedef std::set<Register> SetOfReg;
+    typedef std::map<Register,Register> RegToRegMap;
+
+    const HexagonInstrInfo *TII;
+    const HexagonRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    SetOfReg PredGPRs;
+    VectOfInst PUsers;
+    RegToRegMap G2P;
+
+    bool isPredReg(unsigned R);
+    void collectPredicateGPR(MachineFunction &MF);
+    void processPredicateGPR(const Register &Reg);
+    unsigned getPredForm(unsigned Opc);
+    bool isConvertibleToPredForm(const MachineInstr *MI);
+    bool isScalarCmp(unsigned Opc);
+    bool isScalarPred(Register PredReg);
+    Register getPredRegFor(const Register &Reg);
+    bool convertToPredForm(MachineInstr *MI);
+    bool eliminatePredCopies(MachineFunction &MF);
+  };
+
+  char HexagonGenPredicate::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred",
+  "Hexagon generate predicate operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonGenPredicate, "hexagon-gen-pred",
+  "Hexagon generate predicate operations", false, false)
+
+bool HexagonGenPredicate::isPredReg(unsigned R) {
+  if (!TargetRegisterInfo::isVirtualRegister(R))
+    return false;
+  const TargetRegisterClass *RC = MRI->getRegClass(R);
+  return RC == &Hexagon::PredRegsRegClass;
+}
+
+
+unsigned HexagonGenPredicate::getPredForm(unsigned Opc) {
+  using namespace Hexagon;
+
+  switch (Opc) {
+    case A2_and:
+    case A2_andp:
+      return C2_and;
+    case A4_andn:
+    case A4_andnp:
+      return C2_andn;
+    case M4_and_and:
+      return C4_and_and;
+    case M4_and_andn:
+      return C4_and_andn;
+    case M4_and_or:
+      return C4_and_or;
+
+    case A2_or:
+    case A2_orp:
+      return C2_or;
+    case A4_orn:
+    case A4_ornp:
+      return C2_orn;
+    case M4_or_and:
+      return C4_or_and;
+    case M4_or_andn:
+      return C4_or_andn;
+    case M4_or_or:
+      return C4_or_or;
+
+    case A2_xor:
+    case A2_xorp:
+      return C2_xor;
+
+    case C2_tfrrp:
+      return COPY;
+  }
+  // The opcode corresponding to 0 is TargetOpcode::PHI. We can use 0 here
+  // to denote "none", but we need to make sure that none of the valid opcodes
+  // that we return will ever be 0.
+  assert(PHI == 0 && "Use different value for <none>");
+  return 0;
+}
+
+
+bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  if (getPredForm(Opc) != 0)
+    return true;
+
+  // Comparisons against 0 are also convertible. This does not apply to
+  // A4_rcmpeqi or A4_rcmpneqi, since they produce values 0 or 1, which
+  // may not match the value that the predicate register would have if
+  // it was converted to a predicate form.
+  switch (Opc) {
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C4_cmpneqi:
+      if (MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+        return true;
+      break;
+  }
+  return false;
+}
+
+
+void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
+  for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+    MachineBasicBlock &B = *A;
+    for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+      MachineInstr *MI = &*I;
+      unsigned Opc = MI->getOpcode();
+      switch (Opc) {
+        case Hexagon::C2_tfrpr:
+        case TargetOpcode::COPY:
+          if (isPredReg(MI->getOperand(1).getReg())) {
+            Register RD = MI->getOperand(0);
+            if (TargetRegisterInfo::isVirtualRegister(RD.R))
+              PredGPRs.insert(RD);
+          }
+          break;
+      }
+    }
+  }
+}
+
+
+void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": "
+               << PrintReg(Reg.R, TRI, Reg.S) << "\n");
+  typedef MachineRegisterInfo::use_iterator use_iterator;
+  use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end();
+  if (I == E) {
+    DEBUG(dbgs() << "Dead reg: " << PrintReg(Reg.R, TRI, Reg.S) << '\n');
+    MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+    DefI->eraseFromParent();
+    return;
+  }
+
+  for (; I != E; ++I) {
+    MachineInstr *UseI = I->getParent();
+    if (isConvertibleToPredForm(UseI))
+      PUsers.insert(UseI);
+  }
+}
+
+
+Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
+  // Create a predicate register for a given Reg. The newly created register
+  // will have its value copied from Reg, so that it can be later used as
+  // an operand in other instructions.
+  assert(TargetRegisterInfo::isVirtualRegister(Reg.R));
+  RegToRegMap::iterator F = G2P.find(Reg);
+  if (F != G2P.end())
+    return F->second;
+
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI));
+  MachineInstr *DefI = MRI->getVRegDef(Reg.R);
+  assert(DefI);
+  unsigned Opc = DefI->getOpcode();
+  if (Opc == Hexagon::C2_tfrpr || Opc == TargetOpcode::COPY) {
+    assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
+    Register PR = DefI->getOperand(1);
+    G2P.insert(std::make_pair(Reg, PR));
+    DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
+    return PR;
+  }
+
+  MachineBasicBlock &B = *DefI->getParent();
+  DebugLoc DL = DefI->getDebugLoc();
+  const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+  unsigned NewPR = MRI->createVirtualRegister(PredRC);
+
+  // For convertible instructions, do not modify them, so that they can
+  // be coverted later.  Generate a copy from Reg to NewPR.
+  if (isConvertibleToPredForm(DefI)) {
+    MachineBasicBlock::iterator DefIt = DefI;
+    BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
+      .addReg(Reg.R, 0, Reg.S);
+    G2P.insert(std::make_pair(Reg, Register(NewPR)));
+    DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n');
+    return Register(NewPR);
+  }
+
+  llvm_unreachable("Invalid argument");
+}
+
+
+bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
+  switch (Opc) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::C2_cmpgtp:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::C2_cmpgei:
+    case Hexagon::C2_cmpgeui:
+    case Hexagon::C4_cmpneqi:
+    case Hexagon::C4_cmpltei:
+    case Hexagon::C4_cmplteui:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
+    case Hexagon::A4_cmpbeq:
+    case Hexagon::A4_cmpbeqi:
+    case Hexagon::A4_cmpbgtu:
+    case Hexagon::A4_cmpbgtui:
+    case Hexagon::A4_cmpbgt:
+    case Hexagon::A4_cmpbgti:
+    case Hexagon::A4_cmpheq:
+    case Hexagon::A4_cmphgt:
+    case Hexagon::A4_cmphgtu:
+    case Hexagon::A4_cmpheqi:
+    case Hexagon::A4_cmphgti:
+    case Hexagon::A4_cmphgtui:
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonGenPredicate::isScalarPred(Register PredReg) {
+  std::queue<Register> WorkQ;
+  WorkQ.push(PredReg);
+
+  while (!WorkQ.empty()) {
+    Register PR = WorkQ.front();
+    WorkQ.pop();
+    const MachineInstr *DefI = MRI->getVRegDef(PR.R);
+    if (!DefI)
+      return false;
+    unsigned DefOpc = DefI->getOpcode();
+    switch (DefOpc) {
+      case TargetOpcode::COPY: {
+        const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+        if (MRI->getRegClass(PR.R) != PredRC)
+          return false;
+        // If it is a copy between two predicate registers, fall through.
+      }
+      case Hexagon::C2_and:
+      case Hexagon::C2_andn:
+      case Hexagon::C4_and_and:
+      case Hexagon::C4_and_andn:
+      case Hexagon::C4_and_or:
+      case Hexagon::C2_or:
+      case Hexagon::C2_orn:
+      case Hexagon::C4_or_and:
+      case Hexagon::C4_or_andn:
+      case Hexagon::C4_or_or:
+      case Hexagon::C4_or_orn:
+      case Hexagon::C2_xor:
+        // Add operands to the queue.
+        for (ConstMIOperands Mo(DefI); Mo.isValid(); ++Mo)
+          if (Mo->isReg() && Mo->isUse())
+            WorkQ.push(Register(Mo->getReg()));
+        break;
+
+      // All non-vector compares are ok, everything else is bad.
+      default:
+        return isScalarCmp(DefOpc);
+    }
+  }
+
+  return true;
+}
+
+
+bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI);
+
+  unsigned Opc = MI->getOpcode();
+  assert(isConvertibleToPredForm(MI));
+  unsigned NumOps = MI->getNumOperands();
+  for (unsigned i = 0; i < NumOps; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    Register Reg(MO);
+    if (Reg.S && Reg.S != Hexagon::subreg_loreg)
+      return false;
+    if (!PredGPRs.count(Reg))
+      return false;
+  }
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned NewOpc = getPredForm(Opc);
+  // Special case for comparisons against 0.
+  if (NewOpc == 0) {
+    switch (Opc) {
+      case Hexagon::C2_cmpeqi:
+        NewOpc = Hexagon::C2_not;
+        break;
+      case Hexagon::C4_cmpneqi:
+        NewOpc = TargetOpcode::COPY;
+        break;
+      default:
+        return false;
+    }
+
+    // If it's a scalar predicate register, then all bits in it are
+    // the same. Otherwise, to determine whether all bits are 0 or not
+    // we would need to use any8.
+    Register PR = getPredRegFor(MI->getOperand(1));
+    if (!isScalarPred(PR))
+      return false;
+    // This will skip the immediate argument when creating the predicate
+    // version instruction.
+    NumOps = 2;
+  }
+
+  // Some sanity: check that def is in operand #0.
+  MachineOperand &Op0 = MI->getOperand(0);
+  assert(Op0.isDef());
+  Register OutR(Op0);
+
+  // Don't use getPredRegFor, since it will create an association between
+  // the argument and a created predicate register (i.e. it will insert a
+  // copy if a new predicate register is created).
+  const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+  Register NewPR = MRI->createVirtualRegister(PredRC);
+  MachineInstrBuilder MIB = BuildMI(B, MI, DL, TII->get(NewOpc), NewPR.R);
+
+  // Add predicate counterparts of the GPRs.
+  for (unsigned i = 1; i < NumOps; ++i) {
+    Register GPR = MI->getOperand(i);
+    Register Pred = getPredRegFor(GPR);
+    MIB.addReg(Pred.R, 0, Pred.S);
+  }
+  DEBUG(dbgs() << "generated: " << *MIB);
+
+  // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR
+  // with NewGPR.
+  const TargetRegisterClass *RC = MRI->getRegClass(OutR.R);
+  unsigned NewOutR = MRI->createVirtualRegister(RC);
+  BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), NewOutR)
+    .addReg(NewPR.R, 0, NewPR.S);
+  MRI->replaceRegWith(OutR.R, NewOutR);
+  MI->eraseFromParent();
+
+  // If the processed instruction was C2_tfrrp (i.e. Rn = Pm; Pk = Rn),
+  // then the output will be a predicate register.  Do not visit the
+  // users of it.
+  if (!isPredReg(NewOutR)) {
+    Register R(NewOutR);
+    PredGPRs.insert(R);
+    processPredicateGPR(R);
+  }
+  return true;
+}
+
+
+bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
+  DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n");
+  const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+  bool Changed = false;
+  VectOfInst Erase;
+
+  // First, replace copies
+  //   IntR = PredR1
+  //   PredR2 = IntR
+  // with
+  //   PredR2 = PredR1
+  // Such sequences can be generated when a copy-into-pred is generated from
+  // a gpr register holding a result of a convertible instruction. After
+  // the convertible instruction is converted, its predicate result will be
+  // copied back into the original gpr.
+
+  for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
+    MachineBasicBlock &B = *A;
+    for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+      if (I->getOpcode() != TargetOpcode::COPY)
+        continue;
+      Register DR = I->getOperand(0);
+      Register SR = I->getOperand(1);
+      if (!TargetRegisterInfo::isVirtualRegister(DR.R))
+        continue;
+      if (!TargetRegisterInfo::isVirtualRegister(SR.R))
+        continue;
+      if (MRI->getRegClass(DR.R) != PredRC)
+        continue;
+      if (MRI->getRegClass(SR.R) != PredRC)
+        continue;
+      assert(!DR.S && !SR.S && "Unexpected subregister");
+      MRI->replaceRegWith(DR.R, SR.R);
+      Erase.insert(I);
+      Changed = true;
+    }
+  }
+
+  for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I)
+    (*I)->eraseFromParent();
+
+  return Changed;
+}
+
+
+bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  TRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  PredGPRs.clear();
+  PUsers.clear();
+  G2P.clear();
+
+  bool Changed = false;
+  collectPredicateGPR(MF);
+  for (SetOfReg::iterator I = PredGPRs.begin(), E = PredGPRs.end(); I != E; ++I)
+    processPredicateGPR(*I);
+
+  bool Again;
+  do {
+    Again = false;
+    VectOfInst Processed, Copy;
+
+    typedef VectOfInst::iterator iterator;
+    Copy = PUsers;
+    for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) {
+      MachineInstr *MI = *I;
+      bool Done = convertToPredForm(MI);
+      if (Done) {
+        Processed.insert(MI);
+        Again = true;
+      }
+    }
+    Changed |= Again;
+
+    auto Done = [Processed] (MachineInstr *MI) -> bool {
+      return Processed.count(MI);
+    };
+    PUsers.remove_if(Done);
+  } while (Again);
+
+  Changed |= eliminatePredCopies(MF);
+  return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonGenPredicate() {
+  return new HexagonGenPredicate();
+}
+
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 6e9e69f5a2c7..c739afb70c15 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -459,6 +459,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   MachineFunction &MF = DAG.getMachineFunction();
+  auto PtrVT = getPointerTy(MF.getDataLayout());
 
   // Check for varargs.
   int NumNamedVarArgParams = -1;
@@ -515,8 +516,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SmallVector<SDValue, 8> MemOpChains;
 
   auto &HRI = *Subtarget.getRegisterInfo();
-  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(),
-                                        getPointerTy());
+  SDValue StackPtr =
+      DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
 
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -574,7 +575,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
   if (!isTailCall) {
-    SDValue C = DAG.getConstant(NumBytes, dl, getPointerTy(), true);
+    SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
     Chain = DAG.getCALLSEQ_START(Chain, C, dl);
   }
 
@@ -615,13 +616,13 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (flag_aligned_memcpy) {
     const char *MemcpyName =
       "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
-    Callee = DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+    Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT);
     flag_aligned_memcpy = false;
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
   } else if (ExternalSymbolSDNode *S =
              dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
   }
 
   // Returns a chain & a flag for retval copy to use.
@@ -811,8 +812,8 @@ LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
     BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
   }
 
-  SDValue JumpTableBase = DAG.getNode(HexagonISD::JT, dl,
-                                      getPointerTy(), TargetJT);
+  SDValue JumpTableBase = DAG.getNode(
+      HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT);
   SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
                                    DAG.getConstant(2, dl, MVT::i32));
   SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
@@ -1231,16 +1232,17 @@ SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
   SDLoc dl(Op);
-  Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
 
   const HexagonTargetObjectFile *TLOF =
       static_cast<const HexagonTargetObjectFile *>(
           getTargetMachine().getObjFileLowering());
   if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) {
-    return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+    return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result);
   }
 
-  return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+  return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result);
 }
 
 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
@@ -1261,7 +1263,8 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   SDValue BA_SD =  DAG.getTargetBlockAddress(BA, MVT::i32);
   SDLoc dl(Op);
-  return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), BA_SD);
+  return DAG.getNode(HexagonISD::CONST32_GP, dl,
+                     getPointerTy(DAG.getDataLayout()), BA_SD);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2254,6 +2257,7 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Offset    = Op.getOperand(1);
   SDValue Handler   = Op.getOperand(2);
   SDLoc dl(Op);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   // Mark function as containing a call to EH_RETURN.
   HexagonMachineFunctionInfo *FuncInfo =
@@ -2262,9 +2266,9 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned OffsetReg = Hexagon::R28;
 
-  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                  DAG.getRegister(Hexagon::R30, getPointerTy()),
-                                  DAG.getIntPtrConstant(4, dl));
+  SDValue StoreAddr =
+      DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
+                  DAG.getIntPtrConstant(4, dl));
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
@@ -2338,8 +2342,7 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
 std::pair<unsigned, const TargetRegisterClass *>
 HexagonTargetLowering::getRegForInlineAsmConstraint(
-    const TargetRegisterInfo *TRI, const std::string &Constraint,
-    MVT VT) const {
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':   // R0-R31
@@ -2372,8 +2375,8 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 
 /// isLegalAddressingMode - Return true if the addressing mode represented by
 /// AM is legal for this target, for a load/store of the specified type.
-bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                                  Type *Ty,
+bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                                  const AddrMode &AM, Type *Ty,
                                                   unsigned AS) const {
   // Allows a signed-extended 11-bit immediate field.
   if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
@@ -2463,3 +2466,45 @@ bool llvm::isPositiveHalfWord(SDNode *N) {
     return true;
   }
 }
+
+Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+      AtomicOrdering Ord) const {
+  BasicBlock *BB = Builder.GetInsertBlock();
+  Module *M = BB->getParent()->getParent();
+  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
+  unsigned SZ = Ty->getPrimitiveSizeInBits();
+  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
+  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
+                                   : Intrinsic::hexagon_L4_loadd_locked;
+  Value *Fn = Intrinsic::getDeclaration(M, IntID);
+  return Builder.CreateCall(Fn, Addr, "larx");
+}
+
+/// Perform a store-conditional operation to Addr. Return the status of the
+/// store. This should be 0 if the store succeeded, non-zero otherwise.
+Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
+      Value *Val, Value *Addr, AtomicOrdering Ord) const {
+  BasicBlock *BB = Builder.GetInsertBlock();
+  Module *M = BB->getParent()->getParent();
+  Type *Ty = Val->getType();
+  unsigned SZ = Ty->getPrimitiveSizeInBits();
+  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
+  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
+                                   : Intrinsic::hexagon_S4_stored_locked;
+  Value *Fn = Intrinsic::getDeclaration(M, IntID);
+  Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
+  Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
+  Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
+  return Ext;
+}
+
+bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+  // Do not expand loads and stores that don't exceed 64 bits.
+  return LI->getType()->getPrimitiveSizeInBits() > 64;
+}
+
+bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
+  // Do not expand loads and stores that don't exceed 64 bits.
+  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
+}
+
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index b80e8477eb7b..2642abffaddd 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -165,7 +165,8 @@ bool isPositiveHalfWord(SDNode *N);
 
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-    EVT getSetCCResultType(LLVMContext &C, EVT VT) const override {
+    EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
+                           EVT VT) const override {
       if (!VT.isVector())
         return MVT::i1;
       else
@@ -179,11 +180,10 @@ bool isPositiveHalfWord(SDNode *N);
 
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
-    unsigned getInlineAsmMemConstraint(
-        const std::string &ConstraintCode) const override {
+    unsigned
+    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
       if (ConstraintCode == "o")
         return InlineAsm::Constraint_o;
       else if (ConstraintCode == "v")
@@ -198,8 +198,8 @@ bool isPositiveHalfWord(SDNode *N);
     /// The type may be VoidTy, in which case only return true if the addressing
     /// mode is legal for a load/store of any legal type.
     /// TODO: Handle pre/postinc as well.
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
-                               unsigned AS) const override;
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 
     /// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -207,6 +207,21 @@ bool isPositiveHalfWord(SDNode *N);
     /// compare a register against the immediate without having to materialize
     /// the immediate into a register.
     bool isLegalICmpImmediate(int64_t Imm) const override;
+
+    // Handling of atomic RMW instructions.
+    bool hasLoadLinkedStoreConditional() const override {
+      return true;
+    }
+    Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+        AtomicOrdering Ord) const override;
+    Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+        Value *Addr, AtomicOrdering Ord) const override;
+    bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+    bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
+    AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI)
+        const override {
+      return AtomicRMWExpansionKind::LLSC;
+    }
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 8f255a08f534..f6bb4a045438 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -221,7 +221,7 @@ unsigned HexagonRegisterInfo::getRARegister() const {
 
 unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
                                                &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const HexagonFrameLowering *TFI = getFrameLowering(MF);
   if (TFI->hasFP(MF))
     return Hexagon::R30;
   return Hexagon::R29;
@@ -240,7 +240,8 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
 
 bool
 HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
-  return MF.getSubtarget().getFrameLowering()->hasFP(MF);
+  const HexagonFrameLowering *TFI = getFrameLowering(MF);
+  return TFI->hasFP(MF);
 }
 
 
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index b5db997eb1b8..276cc69eed0f 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -18,12 +18,6 @@ using namespace llvm;
 
 bool llvm::flag_aligned_memcpy;
 
-HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const DataLayout &DL)
-    : TargetSelectionDAGInfo(&DL) {}
-
-HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
-}
-
 SDValue
 HexagonSelectionDAGInfo::
 EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 8ac2e43f9294..80ac5d7bd9e2 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -20,8 +20,6 @@ namespace llvm {
 
 class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  explicit HexagonSelectionDAGInfo(const DataLayout &DL);
-  ~HexagonSelectionDAGInfo();
 
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
                                   SDValue Chain,
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index fe6c4f4298b5..cd482b3e3af1 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -74,7 +74,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
                                    StringRef FS, const TargetMachine &TM)
     : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      TSInfo(*TM.getDataLayout()), FrameLowering() {
+      FrameLowering() {
 
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index a173a8087832..b50442969a29 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -37,6 +37,18 @@ static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
   cl::init(true), cl::Hidden, cl::ZeroOrMore,
   cl::desc("Early expansion of MUX"));
 
+static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
+  cl::Hidden, cl::desc("Generate \"insert\" instructions"));
+
+static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions"));
+
+static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
+  cl::Hidden, cl::desc("Generate \"extract\" instructions"));
+
+static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
+  cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
+  "predicate instructions"));
 
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
@@ -60,23 +72,23 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
                     createVLIWMachineSched);
 
 namespace llvm {
-  FunctionPass *createHexagonExpandCondsets();
-  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
-                                     CodeGenOpt::Level OptLevel);
-  FunctionPass *createHexagonDelaySlotFillerPass(const TargetMachine &TM);
-  FunctionPass *createHexagonFPMoverPass(const TargetMachine &TM);
-  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
   FunctionPass *createHexagonCFGOptimizer();
-
-  FunctionPass *createHexagonSplitConst32AndConst64();
+  FunctionPass *createHexagonCommonGEP();
+  FunctionPass *createHexagonCopyToCombine();
+  FunctionPass *createHexagonExpandCondsets();
   FunctionPass *createHexagonExpandPredSpillCode();
-  FunctionPass *createHexagonHardwareLoops();
-  FunctionPass *createHexagonPeephole();
   FunctionPass *createHexagonFixupHwLoops();
+  FunctionPass *createHexagonGenExtract();
+  FunctionPass *createHexagonGenInsert();
+  FunctionPass *createHexagonGenPredicate();
+  FunctionPass *createHexagonHardwareLoops();
+  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel);
   FunctionPass *createHexagonNewValueJump();
-  FunctionPass *createHexagonCopyToCombine();
   FunctionPass *createHexagonPacketizer();
-  FunctionPass *createHexagonNewValueJump();
+  FunctionPass *createHexagonPeephole();
+  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
+  FunctionPass *createHexagonSplitConst32AndConst64();
 } // end namespace llvm;
 
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
@@ -122,6 +134,7 @@ public:
     return createVLIWMachineSched(C);
   }
 
+  void addIRPasses() override;
   bool addInstSelector() override;
   void addPreRegAlloc() override;
   void addPostRegAlloc() override;
@@ -134,6 +147,20 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
   return new HexagonPassConfig(this, PM);
 }
 
+void HexagonPassConfig::addIRPasses() {
+  TargetPassConfig::addIRPasses();
+  bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+
+  addPass(createAtomicExpandPass(TM));
+  if (!NoOpt) {
+    if (EnableCommGEP)
+      addPass(createHexagonCommonGEP());
+    // Replace certain combinations of shifts and ands with extracts.
+    if (EnableGenExtract)
+      addPass(createHexagonGenExtract());
+  }
+}
+
 bool HexagonPassConfig::addInstSelector() {
   HexagonTargetMachine &TM = getHexagonTargetMachine();
   bool NoOpt = (getOptLevel() == CodeGenOpt::None);
@@ -144,8 +171,13 @@ bool HexagonPassConfig::addInstSelector() {
   addPass(createHexagonISelDag(TM, getOptLevel()));
 
   if (!NoOpt) {
+    // Create logical operations on predicate registers.
+    if (EnableGenPred)
+      addPass(createHexagonGenPredicate(), false);
     addPass(createHexagonPeephole());
     printAndVerify("After hexagon peephole pass");
+    if (EnableGenInsert)
+      addPass(createHexagonGenInsert(), false);
   }
 
   return false;
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
index 8259055b3f41..9d288af0214a 100644
--- a/lib/Target/Hexagon/LLVMBuild.txt
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -39,4 +39,5 @@ required_libraries =
  SelectionDAG
  Support
  Target
+ TransformUtils
 add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 83ce0abd835e..53305d85fd80 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -46,7 +46,7 @@ MCInstrInfo *llvm::createHexagonMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitHexagonMCRegisterInfo(X, Hexagon::R0);
   return X;
@@ -54,9 +54,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *
 createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
 namespace {
@@ -151,7 +149,8 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT,
+                                                 Reloc::Model RM,
                                                  CodeModel::Model CM,
                                                  CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index a8f9b52746ad..3f377631c016 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -18,7 +18,6 @@ add_llvm_target(MSP430CodeGen
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
-  MSP430SelectionDAGInfo.cpp
   MSP430AsmPrinter.cpp
   MSP430MCInstLower.cpp
   )
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index be445c56389a..807d1129b5fc 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -37,7 +37,7 @@ static MCInstrInfo *createMSP430MCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createMSP430MCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitMSP430MCRegisterInfo(X, MSP430::PC);
   return X;
@@ -45,12 +45,11 @@ static MCRegisterInfo *createMSP430MCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *
 createMSP430MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createMSP430MCSubtargetInfoImpl(TT, CPU, FS);
 }
 
-static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createMSP430MCCodeGenInfo(const Triple &TT,
+                                                Reloc::Model RM,
                                                 CodeModel::Model CM,
                                                 CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 5ce5013d898c..8a01334ee2dd 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -254,10 +254,11 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
       AM.Base.Reg = CurDAG->getRegister(0, VT);
   }
 
-  Base  = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
-    CurDAG->getTargetFrameIndex(AM.Base.FrameIndex,
-                                getTargetLowering()->getPointerTy()) :
-    AM.Base.Reg;
+  Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase)
+             ? CurDAG->getTargetFrameIndex(
+                   AM.Base.FrameIndex,
+                   getTargetLowering()->getPointerTy(CurDAG->getDataLayout()))
+             : AM.Base.Reg;
 
   if (AM.GV)
     Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(N),
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index bc51741a836f..29bc8b33988a 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -213,7 +213,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 TargetLowering::ConstraintType
-MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+MSP430TargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
@@ -227,8 +227,7 @@ MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
 
 std::pair<unsigned, const TargetRegisterClass *>
 MSP430TargetLowering::getRegForInlineAsmConstraint(
-    const TargetRegisterInfo *TRI, const std::string &Constraint,
-    MVT VT) const {
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC Constraint Letters
     switch (Constraint[0]) {
@@ -494,7 +493,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
       if (Flags.isByVal()) {
         int FI = MFI->CreateFixedObject(Flags.getByValSize(),
                                         VA.getLocMemOffset(), true);
-        InVal = DAG.getFrameIndex(FI, getPointerTy());
+        InVal = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
       } else {
         // Load the argument to a virtual register
         unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
@@ -592,10 +591,10 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, dl,
-                                                      getPointerTy(), true),
-                               dl);
+  Chain = DAG.getCALLSEQ_START(Chain,
+                               DAG.getConstant(NumBytes, dl, PtrVT, true), dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
   SmallVector<SDValue, 12> MemOpChains;
@@ -630,12 +629,11 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
       assert(VA.isMemLoc());
 
       if (!StackPtr.getNode())
-        StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, getPointerTy());
+        StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SP, PtrVT);
 
-      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                   StackPtr,
-                                   DAG.getIntPtrConstant(VA.getLocMemOffset(),
-                                                         dl));
+      SDValue PtrOff =
+          DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+                      DAG.getIntPtrConstant(VA.getLocMemOffset(), dl));
 
       SDValue MemOp;
       ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -700,11 +698,8 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, dl, getPointerTy(),
-                                             true),
-                             DAG.getConstant(0, dl, getPointerTy(), true),
-                             InFlag, dl);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, dl, PtrVT, true),
+                             DAG.getConstant(0, dl, PtrVT, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 
   // Handle result values, copying them out of physregs into vregs that we
@@ -788,30 +783,31 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
                                                  SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   // Create the TargetGlobalAddress node, folding in the constant offset.
-  SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
-                                              getPointerTy(), Offset);
-  return DAG.getNode(MSP430ISD::Wrapper, SDLoc(Op),
-                     getPointerTy(), Result);
+  SDValue Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), PtrVT, Offset);
+  return DAG.getNode(MSP430ISD::Wrapper, SDLoc(Op), PtrVT, Result);
 }
 
 SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
                                                   SelectionDAG &DAG) const {
   SDLoc dl(Op);
   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
-  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT);
 
-  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
+  return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result);
 }
 
 SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDLoc dl(Op);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy());
+  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT);
 
-  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
+  return DAG.getNode(MSP430ISD::Wrapper, dl, PtrVT, Result);
 }
 
 static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
@@ -1024,16 +1020,17 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
   int ReturnAddrIndex = FuncInfo->getRAIndex();
+  auto PtrVT = getPointerTy(MF.getDataLayout());
 
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
-    uint64_t SlotSize = getDataLayout()->getPointerSize();
+    uint64_t SlotSize = MF.getDataLayout().getPointerSize();
     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
                                                            true);
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
-  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+  return DAG.getFrameIndex(ReturnAddrIndex, PtrVT);
 }
 
 SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
@@ -1046,21 +1043,21 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
 
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   SDLoc dl(Op);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-        DAG.getConstant(getDataLayout()->getPointerSize(), dl, MVT::i16);
-    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                   FrameAddr, Offset),
+        DAG.getConstant(DAG.getDataLayout().getPointerSize(), dl, MVT::i16);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
                        MachinePointerInfo(), false, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
+                     MachinePointerInfo(), false, false, false, 0);
 }
 
 SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -1084,10 +1081,11 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
                                            SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   // Frame index of first vararg argument
-  SDValue FrameIndex = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
-                                         getPointerTy());
+  SDValue FrameIndex =
+      DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
   // Create a store of the frame index to the location operand
@@ -1099,9 +1097,9 @@ SDValue MSP430TargetLowering::LowerVASTART(SDValue Op,
 SDValue MSP430TargetLowering::LowerJumpTable(SDValue Op,
                                              SelectionDAG &DAG) const {
     JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-    SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
-    return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT),
-                       getPointerTy(), Result);
+    auto PtrVT = getPointerTy(DAG.getDataLayout());
+    SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+    return DAG.getNode(MSP430ISD::Wrapper, SDLoc(JT), PtrVT, Result);
 }
 
 /// getPostIndexedAddressParts - returns true by value, base pointer and
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 80d3ae175fb1..2d63852c185b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -72,7 +72,9 @@ namespace llvm {
     explicit MSP430TargetLowering(const TargetMachine &TM,
                                   const MSP430Subtarget &STI);
 
-    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
+    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+      return MVT::i8;
+    }
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -96,11 +98,10 @@ namespace llvm {
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
     TargetLowering::ConstraintType
-    getConstraintType(const std::string &Constraint) const override;
+    getConstraintType(StringRef Constraint) const override;
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
     /// isTruncateFree - Return true if it's free to truncate a value of type
     /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 614467bcd248..2fb82e535e8d 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -37,7 +37,7 @@ MSP430RegisterInfo::MSP430RegisterInfo()
 
 const MCPhysReg*
 MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+  const MSP430FrameLowering *TFI = getFrameLowering(*MF);
   const Function* F = MF->getFunction();
   static const MCPhysReg CalleeSavedRegs[] = {
     MSP430::FP, MSP430::R5, MSP430::R6, MSP430::R7,
@@ -73,7 +73,7 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const MSP430FrameLowering *TFI = getFrameLowering(MF);
 
   // Mark 4 special registers with subregisters as reserved.
   Reserved.set(MSP430::PCB);
@@ -109,7 +109,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const MSP430FrameLowering *TFI = getFrameLowering(MF);
   DebugLoc dl = MI.getDebugLoc();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
@@ -156,7 +156,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 }
 
 unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
-
+  const MSP430FrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? MSP430::FP : MSP430::SP;
 }
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
deleted file mode 100644
index 3897ef684d4d..000000000000
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the MSP430SelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSP430TargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "msp430-selectiondag-info"
-
-MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const DataLayout &DL)
-    : TargetSelectionDAGInfo(&DL) {}
-
-MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
-}
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
deleted file mode 100644
index 61a6b19111db..000000000000
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_MSP430_MSP430SELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class MSP430TargetMachine;
-
-class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit MSP430SelectionDAGInfo(const DataLayout &DL);
-  ~MSP430SelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 6374f41c00ea..6216348e4d71 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -34,5 +34,4 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
 MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU,
                                  const std::string &FS, const TargetMachine &TM)
     : MSP430GenSubtargetInfo(TT, CPU, FS), FrameLowering(),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      TSInfo(*TM.getDataLayout()) {}
+      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 81f6f027d45c..ff2656d26dd2 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -18,8 +18,8 @@
 #include "MSP430ISelLowering.h"
 #include "MSP430InstrInfo.h"
 #include "MSP430RegisterInfo.h"
-#include "MSP430SelectionDAGInfo.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
@@ -35,7 +35,7 @@ class MSP430Subtarget : public MSP430GenSubtargetInfo {
   MSP430FrameLowering FrameLowering;
   MSP430InstrInfo InstrInfo;
   MSP430TargetLowering TLInfo;
-  MSP430SelectionDAGInfo TSInfo;
+  TargetSelectionDAGInfo TSInfo;
 
 public:
   /// This constructor initializes the data members to match that
@@ -60,7 +60,7 @@ public:
   const MSP430TargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
-  const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
+  const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
 };
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index f14156dbfa2b..5107d2ae58c3 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -1727,37 +1727,59 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
 }
 
 namespace {
-template <unsigned ShiftAmount>
+void emitRX(unsigned Opcode, unsigned DstReg, MCOperand Imm, SMLoc IDLoc,
+            SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  tmpInst.setOpcode(Opcode);
+  tmpInst.addOperand(MCOperand::createReg(DstReg));
+  tmpInst.addOperand(Imm);
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+
+void emitRI(unsigned Opcode, unsigned DstReg, int16_t Imm, SMLoc IDLoc,
+            SmallVectorImpl<MCInst> &Instructions) {
+  emitRX(Opcode, DstReg, MCOperand::createImm(Imm), IDLoc, Instructions);
+}
+
+
+void emitRRX(unsigned Opcode, unsigned DstReg, unsigned SrcReg, MCOperand Imm,
+             SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  tmpInst.setOpcode(Opcode);
+  tmpInst.addOperand(MCOperand::createReg(DstReg));
+  tmpInst.addOperand(MCOperand::createReg(SrcReg));
+  tmpInst.addOperand(Imm);
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+
+void emitRRR(unsigned Opcode, unsigned DstReg, unsigned SrcReg,
+             unsigned SrcReg2, SMLoc IDLoc,
+             SmallVectorImpl<MCInst> &Instructions) {
+  emitRRX(Opcode, DstReg, SrcReg, MCOperand::createReg(SrcReg2), IDLoc,
+          Instructions);
+}
+
+void emitRRI(unsigned Opcode, unsigned DstReg, unsigned SrcReg, int16_t Imm,
+             SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  emitRRX(Opcode, DstReg, SrcReg, MCOperand::createImm(Imm), IDLoc,
+          Instructions);
+}
+
+template <int16_t ShiftAmount>
 void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions) {
-  MCInst tmpInst;
-  if (ShiftAmount >= 32) {
-    tmpInst.setOpcode(Mips::DSLL32);
-    tmpInst.addOperand(MCOperand::createReg(RegNo));
-    tmpInst.addOperand(MCOperand::createReg(RegNo));
-    tmpInst.addOperand(MCOperand::createImm(ShiftAmount - 32));
-    tmpInst.setLoc(IDLoc);
-    Instructions.push_back(tmpInst);
-    tmpInst.clear();
-  } else if (ShiftAmount > 0) {
-    tmpInst.setOpcode(Mips::DSLL);
-    tmpInst.addOperand(MCOperand::createReg(RegNo));
-    tmpInst.addOperand(MCOperand::createReg(RegNo));
-    tmpInst.addOperand(MCOperand::createImm(ShiftAmount));
-    tmpInst.setLoc(IDLoc);
-    Instructions.push_back(tmpInst);
-    tmpInst.clear();
-  }
+  if (ShiftAmount >= 32)
+    emitRRI(Mips::DSLL32, RegNo, RegNo, ShiftAmount - 32, IDLoc, Instructions);
+  else if (ShiftAmount > 0)
+    emitRRI(Mips::DSLL, RegNo, RegNo, ShiftAmount, IDLoc, Instructions);
+
   // There's no need for an ORi if the immediate is 0.
   if (Operand.isImm() && Operand.getImm() == 0)
     return;
 
-  tmpInst.setOpcode(Mips::ORi);
-  tmpInst.addOperand(MCOperand::createReg(RegNo));
-  tmpInst.addOperand(MCOperand::createReg(RegNo));
-  tmpInst.addOperand(Operand);
-  tmpInst.setLoc(IDLoc);
-  Instructions.push_back(tmpInst);
+  emitRRX(Mips::ORi, RegNo, RegNo, Operand, IDLoc, Instructions);
 }
 
 template <unsigned ShiftAmount>
@@ -1818,12 +1840,22 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     return true;
   }
 
+  if (Is32BitImm) {
+    if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
+      // Sign extend up to 64-bit so that the predicates match the hardware
+      // behaviour. In particular, isInt<16>(0xffff8000) and similar should be
+      // true.
+      ImmValue = SignExtend64<32>(ImmValue);
+    } else {
+      Error(IDLoc, "instruction requires a 32-bit immediate");
+      return true;
+    }
+  }
+
   bool UseSrcReg = false;
   if (SrcReg != Mips::NoRegister)
     UseSrcReg = true;
 
-  MCInst tmpInst;
-
   unsigned TmpReg = DstReg;
   if (UseSrcReg && (DstReg == SrcReg)) {
     // At this point we need AT to perform the expansions and we exit if it is
@@ -1834,29 +1866,26 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     TmpReg = ATReg;
   }
 
-  tmpInst.setLoc(IDLoc);
   // FIXME: gas has a special case for values that are 000...1111, which
   // becomes a li -1 and then a dsrl
-  if (0 <= ImmValue && ImmValue <= 65535) {
-    // For unsigned and positive signed 16-bit values (0 <= j <= 65535):
-    // li d,j => ori d,$zero,j
-    if (!UseSrcReg)
-      SrcReg = isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
-    tmpInst.setOpcode(Mips::ORi);
-    tmpInst.addOperand(MCOperand::createReg(DstReg));
-    tmpInst.addOperand(MCOperand::createReg(SrcReg));
-    tmpInst.addOperand(MCOperand::createImm(ImmValue));
-    Instructions.push_back(tmpInst);
-  } else if (ImmValue < 0 && ImmValue >= -32768) {
-    // For negative signed 16-bit values (-32768 <= j < 0):
+  if (isInt<16>(ImmValue)) {
     // li d,j => addiu d,$zero,j
     if (!UseSrcReg)
       SrcReg = Mips::ZERO;
-    tmpInst.setOpcode(Mips::ADDiu);
-    tmpInst.addOperand(MCOperand::createReg(DstReg));
-    tmpInst.addOperand(MCOperand::createReg(SrcReg));
-    tmpInst.addOperand(MCOperand::createImm(ImmValue));
-    Instructions.push_back(tmpInst);
+    emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
+  } else if (isUInt<16>(ImmValue)) {
+    // li d,j => ori d,$zero,j
+    unsigned TmpReg = DstReg;
+    if (SrcReg == DstReg) {
+      unsigned ATReg = getATReg(IDLoc);
+      if (!ATReg)
+        return true;
+      TmpReg = ATReg;
+    }
+
+    emitRRI(Mips::ORi, TmpReg, Mips::ZERO, ImmValue, IDLoc, Instructions);
+    if (UseSrcReg)
+      emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
   } else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
     warnIfNoMacro(IDLoc);
 
@@ -1869,30 +1898,16 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     if (!Is32BitImm && !isInt<32>(ImmValue)) {
       // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
       // upper 32 bits.
-      tmpInst.setOpcode(Mips::ORi);
-      tmpInst.addOperand(MCOperand::createReg(TmpReg));
-      tmpInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      tmpInst.addOperand(MCOperand::createImm(Bits31To16));
-      tmpInst.setLoc(IDLoc);
-      Instructions.push_back(tmpInst);
-      // Move the value to the upper 16 bits by doing a 16-bit left shift.
-      createLShiftOri<16>(0, TmpReg, IDLoc, Instructions);
-    } else {
-      tmpInst.setOpcode(Mips::LUi);
-      tmpInst.addOperand(MCOperand::createReg(TmpReg));
-      tmpInst.addOperand(MCOperand::createImm(Bits31To16));
-      Instructions.push_back(tmpInst);
-    }
+      emitRRI(Mips::ORi, TmpReg, Mips::ZERO, Bits31To16, IDLoc, Instructions);
+      emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, Instructions);
+    } else
+      emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
     createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions);
 
     if (UseSrcReg)
       createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
 
   } else if ((ImmValue & (0xffffLL << 48)) == 0) {
-    if (Is32BitImm) {
-      Error(IDLoc, "instruction requires a 32-bit immediate");
-      return true;
-    }
     warnIfNoMacro(IDLoc);
 
     //            <-------  lo32 ------>
@@ -1912,10 +1927,7 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
     uint16_t Bits15To0 = ImmValue & 0xffff;
 
-    tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(TmpReg));
-    tmpInst.addOperand(MCOperand::createImm(Bits47To32));
-    Instructions.push_back(tmpInst);
+    emitRI(Mips::LUi, TmpReg, Bits47To32, IDLoc, Instructions);
     createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions);
     createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
 
@@ -1923,10 +1935,6 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
       createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
 
   } else {
-    if (Is32BitImm) {
-      Error(IDLoc, "instruction requires a 32-bit immediate");
-      return true;
-    }
     warnIfNoMacro(IDLoc);
 
     // <-------  hi32 ------> <-------  lo32 ------>
@@ -1948,10 +1956,7 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
     uint16_t Bits15To0 = ImmValue & 0xffff;
 
-    tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(TmpReg));
-    tmpInst.addOperand(MCOperand::createImm(Bits63To48));
-    Instructions.push_back(tmpInst);
+    emitRI(Mips::LUi, TmpReg, Bits63To48, IDLoc, Instructions);
     createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions);
 
     // When Bits31To16 is 0, do a left shift of 32 bits instead of doing
@@ -2096,8 +2101,8 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
     tmpInst.addOperand(MCOperand::createExpr(HiExpr));
     Instructions.push_back(tmpInst);
 
-    createLShiftOri<0>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
-                       Instructions);
+    emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), SMLoc(),
+            Instructions);
   }
 
   if (UseSrcReg)
@@ -2708,12 +2713,8 @@ void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
 void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
                                unsigned TrgReg, bool Is64Bit,
                                SmallVectorImpl<MCInst> &Instructions) {
-  MCInst AdduInst;
-  AdduInst.setOpcode(Is64Bit ? Mips::DADDu : Mips::ADDu);
-  AdduInst.addOperand(MCOperand::createReg(DstReg));
-  AdduInst.addOperand(MCOperand::createReg(SrcReg));
-  AdduInst.addOperand(MCOperand::createReg(TrgReg));
-  Instructions.push_back(AdduInst);
+  emitRRR(Is64Bit ? Mips::DADDu : Mips::ADDu, DstReg, SrcReg, TrgReg, SMLoc(),
+          Instructions);
 }
 
 unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 36ba8e559e0b..bde843afd3d2 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -46,7 +46,6 @@ add_llvm_target(MipsCodeGen
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
   MipsTargetObjectFile.cpp
-  MipsSelectionDAGInfo.cpp
   )
 
 add_subdirectory(InstPrinter)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 9bdf8235a2b4..949ee1474f96 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -59,7 +59,7 @@ static MCInstrInfo *createMipsMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createMipsMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitMipsMCRegisterInfo(X, Mips::RA);
   return X;
@@ -68,9 +68,7 @@ static MCRegisterInfo *createMipsMCRegisterInfo(StringRef TT) {
 static MCSubtargetInfo *createMipsMCSubtargetInfo(const Triple &TT,
                                                   StringRef CPU, StringRef FS) {
   CPU = MIPS_MC::selectMipsCPU(TT, CPU);
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitMipsMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createMipsMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
 static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
@@ -84,7 +82,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createMipsMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
                                               CodeModel::Model CM,
                                               CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index db2a924a99f9..46cc99c62393 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -152,18 +152,19 @@ Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   return isInt<15>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
 }
 
-void Mips16FrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS) const {
+void Mips16FrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                               BitVector &SavedRegs,
+                                               RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   const Mips16InstrInfo &TII =
       *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
   const MipsRegisterInfo &RI = TII.getRegisterInfo();
   const BitVector Reserved = RI.getReservedRegs(MF);
   bool SaveS2 = Reserved[Mips::S2];
   if (SaveS2)
-    MF.getRegInfo().setPhysRegUsed(Mips::S2);
+    SavedRegs.set(Mips::S2);
   if (hasFP(MF))
-    MF.getRegInfo().setPhysRegUsed(Mips::S0);
+    SavedRegs.set(Mips::S0);
 }
 
 const MipsFrameLowering *
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index f281c927c1c4..b48ed4641ea7 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -38,8 +38,8 @@ public:
 
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS) const override;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 7b6a2a154471..bce2c1eb4485 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -120,13 +120,13 @@ void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
 SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() {
   unsigned Mips16SPAliasReg =
     MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg();
-  return CurDAG->getRegister(Mips16SPAliasReg,
-                             getTargetLowering()->getPointerTy());
+  auto PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
+  return CurDAG->getRegister(Mips16SPAliasReg, PtrVT);
 }
 
 void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
-  SDValue AliasFPReg = CurDAG->getRegister(Mips::S0,
-                                           getTargetLowering()->getPointerTy());
+  auto PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
+  SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, PtrVT);
   if (Parent) {
     switch (Parent->getOpcode()) {
       case ISD::LOAD: {
@@ -155,7 +155,7 @@ void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) {
       }
     }
   }
-  AliasReg = CurDAG->getRegister(Mips::SP, getTargetLowering()->getPointerTy());
+  AliasReg = CurDAG->getRegister(Mips::SP, PtrVT);
   return;
 
 }
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 846e3c964f44..3522cbb1f36a 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -502,7 +502,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
     unsigned V0Reg = Mips::V0;
     if (NeedMips16Helper) {
       RegsToPass.push_front(std::make_pair(V0Reg, Callee));
-      JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
+      JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction,
+                                         getPointerTy(DAG.getDataLayout()));
       ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
       JumpTarget = getAddrGlobal(S, CLI.DL, JumpTarget.getValueType(), DAG,
                                  MipsII::MO_GOT, Chain,
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index c2651b82d285..e2f6fcc17726 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -267,7 +267,7 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
 }
 
 unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
-  assert(TLI.getValueType(AI->getType(), true) == MVT::i32 &&
+  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 &&
          "Alloca should always return a pointer.");
 
   DenseMap<const AllocaInst *, int>::iterator SI =
@@ -382,7 +382,7 @@ unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
 // Materialize a constant into a register, and return the register
 // number (or zero if we failed to handle it).
 unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
-  EVT CEVT = TLI.getValueType(C->getType(), true);
+  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 
   // Only handle simple types.
   if (!CEVT.isSimple())
@@ -507,12 +507,13 @@ bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
     break;
   case Instruction::IntToPtr:
     // Look past no-op inttoptrs if its operand is in the same BB.
-    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+        TLI.getPointerTy(DL))
       return computeCallAddress(U->getOperand(0), Addr);
     break;
   case Instruction::PtrToInt:
     // Look past no-op ptrtoints if its operand is in the same BB.
-    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
       return computeCallAddress(U->getOperand(0), Addr);
     break;
   }
@@ -532,7 +533,7 @@ bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
 }
 
 bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
-  EVT evt = TLI.getValueType(Ty, true);
+  EVT evt = TLI.getValueType(DL, Ty, true);
   // Only handle simple types.
   if (evt == MVT::Other || !evt.isSimple())
     return false;
@@ -931,8 +932,8 @@ bool MipsFastISel::selectFPExt(const Instruction *I) {
   if (UnsupportedFPMode)
     return false;
   Value *Src = I->getOperand(0);
-  EVT SrcVT = TLI.getValueType(Src->getType(), true);
-  EVT DestVT = TLI.getValueType(I->getType(), true);
+  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
 
   if (SrcVT != MVT::f32 || DestVT != MVT::f64)
     return false;
@@ -998,8 +999,8 @@ bool MipsFastISel::selectFPTrunc(const Instruction *I) {
   if (UnsupportedFPMode)
     return false;
   Value *Src = I->getOperand(0);
-  EVT SrcVT = TLI.getValueType(Src->getType(), true);
-  EVT DestVT = TLI.getValueType(I->getType(), true);
+  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
 
   if (SrcVT != MVT::f64 || DestVT != MVT::f32)
     return false;
@@ -1415,7 +1416,8 @@ bool MipsFastISel::selectRet(const Instruction *I) {
   if (Ret->getNumOperands() > 0) {
     CallingConv::ID CC = F.getCallingConv();
     SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
+
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
     MipsCCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs,
@@ -1449,7 +1451,7 @@ bool MipsFastISel::selectRet(const Instruction *I) {
     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
       return false;
 
-    EVT RVEVT = TLI.getValueType(RV->getType());
+    EVT RVEVT = TLI.getValueType(DL, RV->getType());
     if (!RVEVT.isSimple())
       return false;
 
@@ -1493,8 +1495,8 @@ bool MipsFastISel::selectTrunc(const Instruction *I) {
   Value *Op = I->getOperand(0);
 
   EVT SrcVT, DestVT;
-  SrcVT = TLI.getValueType(Op->getType(), true);
-  DestVT = TLI.getValueType(I->getType(), true);
+  SrcVT = TLI.getValueType(DL, Op->getType(), true);
+  DestVT = TLI.getValueType(DL, I->getType(), true);
 
   if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
     return false;
@@ -1521,8 +1523,8 @@ bool MipsFastISel::selectIntExt(const Instruction *I) {
     return false;
 
   EVT SrcEVT, DestEVT;
-  SrcEVT = TLI.getValueType(SrcTy, true);
-  DestEVT = TLI.getValueType(DestTy, true);
+  SrcEVT = TLI.getValueType(DL, SrcTy, true);
+  DestEVT = TLI.getValueType(DL, DestTy, true);
   if (!SrcEVT.isSimple())
     return false;
   if (!DestEVT.isSimple())
@@ -1620,7 +1622,7 @@ unsigned MipsFastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
 }
 
 bool MipsFastISel::selectDivRem(const Instruction *I, unsigned ISDOpcode) {
-  EVT DestEVT = TLI.getValueType(I->getType(), true);
+  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
   if (!DestEVT.isSimple())
     return false;
 
@@ -1685,7 +1687,7 @@ bool MipsFastISel::selectShift(const Instruction *I) {
     if (!TempReg)
       return false;
 
-    MVT Op0MVT = TLI.getValueType(Op0->getType(), true).getSimpleVT();
+    MVT Op0MVT = TLI.getValueType(DL, Op0->getType(), true).getSimpleVT();
     bool IsZExt = Opcode == Instruction::LShr;
     if (!emitIntExt(Op0MVT, Op0Reg, MVT::i32, TempReg, IsZExt))
       return false;
@@ -1803,7 +1805,7 @@ unsigned MipsFastISel::getRegEnsuringSimpleIntegerWidening(const Value *V,
   unsigned VReg = getRegForValue(V);
   if (VReg == 0)
     return 0;
-  MVT VMVT = TLI.getValueType(V->getType(), true).getSimpleVT();
+  MVT VMVT = TLI.getValueType(DL, V->getType(), true).getSimpleVT();
   if ((VMVT == MVT::i8) || (VMVT == MVT::i16)) {
     unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
     if (!emitIntExt(VMVT, VReg, MVT::i32, TempReg, IsUnsigned))
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 2c9868ac051d..06502397b6b8 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -59,8 +59,9 @@ bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
 /// GOT address into a register.
 SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
   unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
-  return CurDAG->getRegister(GlobalBaseReg,
-                             getTargetLowering()->getPointerTy()).getNode();
+  return CurDAG->getRegister(GlobalBaseReg, getTargetLowering()->getPointerTy(
+                                                CurDAG->getDataLayout()))
+      .getNode();
 }
 
 /// ComplexPattern used on MipsInstrInfo
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 67ddcc4dacb9..fbebb9abb4cc 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -466,7 +466,8 @@ MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
   return Mips::createFastISel(funcInfo, libInfo);
 }
 
-EVT MipsTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT MipsTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+                                           EVT VT) const {
   if (!VT.isVector())
     return MVT::i32;
   return VT.changeVectorElementTypeToInteger();
@@ -1579,9 +1580,10 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Table = Op.getOperand(1);
   SDValue Index = Op.getOperand(2);
   SDLoc DL(Op);
-  EVT PTy = getPointerTy();
+  auto &TD = DAG.getDataLayout();
+  EVT PTy = getPointerTy(TD);
   unsigned EntrySize =
-    DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(*getDataLayout());
+      DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
   Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
                       DAG.getConstant(EntrySize, DL, PTy));
@@ -1647,10 +1649,10 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
 {
   SDLoc DL(Op);
   EVT Ty = Op.getOperand(0).getValueType();
-  SDValue Cond = DAG.getNode(ISD::SETCC, DL,
-                             getSetCCResultType(*DAG.getContext(), Ty),
-                             Op.getOperand(0), Op.getOperand(1),
-                             Op.getOperand(4));
+  SDValue Cond =
+      DAG.getNode(ISD::SETCC, DL, getSetCCResultType(DAG.getDataLayout(),
+                                                     *DAG.getContext(), Ty),
+                  Op.getOperand(0), Op.getOperand(1), Op.getOperand(4));
 
   return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2),
                      Op.getOperand(3));
@@ -1723,7 +1725,7 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   SDLoc DL(GA);
   const GlobalValue *GV = GA->getGlobal();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   TLSModel::Model model = getTargetMachine().getTLSModel(GV);
 
@@ -1831,7 +1833,7 @@ SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 
   SDLoc DL(Op);
   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
-                                 getPointerTy());
+                                 getPointerTy(MF.getDataLayout()));
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
@@ -1850,9 +1852,9 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Node);
   unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4;
 
-  SDValue VAListLoad = DAG.getLoad(getPointerTy(), DL, Chain, VAListPtr,
-                                   MachinePointerInfo(SV), false, false, false,
-                                   0);
+  SDValue VAListLoad =
+      DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, VAListPtr,
+                  MachinePointerInfo(SV), false, false, false, 0);
   SDValue VAList = VAListLoad;
 
   // Re-align the pointer if necessary.
@@ -1874,7 +1876,9 @@ SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Increment the pointer, VAList, to the next vaarg.
-  unsigned ArgSizeInBytes = getDataLayout()->getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
+  auto &TD = DAG.getDataLayout();
+  unsigned ArgSizeInBytes =
+      TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext()));
   SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList,
                              DAG.getConstant(RoundUpToAlignment(ArgSizeInBytes,
                                                             ArgSlotSizeInBytes),
@@ -2062,7 +2066,7 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
   Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
   return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
                      DAG.getRegister(OffsetReg, Ty),
-                     DAG.getRegister(AddrReg, getPointerTy()),
+                     DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())),
                      Chain.getValue(1));
 }
 
@@ -2479,15 +2483,16 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
                                    SDValue Chain, SDValue Arg, SDLoc DL,
                                    bool IsTailCall, SelectionDAG &DAG) const {
   if (!IsTailCall) {
-    SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
-                                 DAG.getIntPtrConstant(Offset, DL));
+    SDValue PtrOff =
+        DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr,
+                    DAG.getIntPtrConstant(Offset, DL));
     return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo(), false,
                         false, 0);
   }
 
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false);
-  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(),
                       /*isVolatile=*/ true, false, 0);
 }
@@ -2611,8 +2616,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (!IsTailCall)
     Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL);
 
-  SDValue StackPtr = DAG.getCopyFromReg(
-      Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, getPointerTy());
+  SDValue StackPtr =
+      DAG.getCopyFromReg(Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP,
+                         getPointerTy(DAG.getDataLayout()));
 
   // With EABI is it possible to have 16 args on registers.
   std::deque< std::pair<unsigned, SDValue> > RegsToPass;
@@ -2750,7 +2756,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         IsCallReloc = true;
       }
     } else
-      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL,
+                                          getPointerTy(DAG.getDataLayout()), 0,
                                           MipsII::MO_NO_FLAG);
     GlobalOrExternal = true;
   }
@@ -2758,8 +2765,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     const char *Sym = S->getSymbol();
 
     if (!ABI.IsN64() && !IsPIC) // !N64 && static
-      Callee =
-          DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG);
+      Callee = DAG.getTargetExternalSymbol(
+          Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG);
     else if (LargeGOT) {
       Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16,
                                      MipsII::MO_CALL_LO16, Chain,
@@ -3029,7 +3036,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
                                       VA.getLocMemOffset(), true);
 
       // Create load nodes to retrieve arguments from the stack
-      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
       SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
                                      MachinePointerInfo::getFixedStack(FI),
                                      false, false, false, 0);
@@ -3174,12 +3181,13 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
 
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
-    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+    SDValue Val =
+        DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout()));
     unsigned V0 = ABI.IsN64() ? Mips::V0_64 : Mips::V0;
 
     Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
     Flag = Chain.getValue(1);
-    RetOps.push_back(DAG.getRegister(V0, getPointerTy()));
+    RetOps.push_back(DAG.getRegister(V0, getPointerTy(DAG.getDataLayout())));
   }
 
   RetOps[0] = Chain;  // Update chain.
@@ -3198,9 +3206,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
 
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
-MipsTargetLowering::ConstraintType MipsTargetLowering::
-getConstraintType(const std::string &Constraint) const
-{
+MipsTargetLowering::ConstraintType
+MipsTargetLowering::getConstraintType(StringRef Constraint) const {
   // Mips specific constraints
   // GCC config/mips/constraints.md
   //
@@ -3290,9 +3297,8 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
 /// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag
 /// that is returned indicates whether parsing was successful. The second flag
 /// is true if the numeric part exists.
-static std::pair<bool, bool>
-parsePhysicalReg(StringRef C, std::string &Prefix,
-                 unsigned long long &Reg) {
+static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
+                                              unsigned long long &Reg) {
   if (C.front() != '{' || C.back() != '}')
     return std::make_pair(false, false);
 
@@ -3300,7 +3306,7 @@ parsePhysicalReg(StringRef C, std::string &Prefix,
   StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
   I = std::find_if(B, E, std::ptr_fun(isdigit));
 
-  Prefix.assign(B, I - B);
+  Prefix = StringRef(B, I - B);
 
   // The second flag is set to false if no numeric characters were found.
   if (I == E)
@@ -3316,7 +3322,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
   const TargetRegisterInfo *TRI =
       Subtarget.getRegisterInfo();
   const TargetRegisterClass *RC;
-  std::string Prefix;
+  StringRef Prefix;
   unsigned long long Reg;
 
   std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
@@ -3332,7 +3338,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
     RC = TRI->getRegClass(Prefix == "hi" ?
                           Mips::HI32RegClassID : Mips::LO32RegClassID);
     return std::make_pair(*(RC->begin()), RC);
-  } else if (Prefix.compare(0, 4, "$msa") == 0) {
+  } else if (Prefix.startswith("$msa")) {
     // Parse $msa(ir|csr|access|save|modify|request|map|unmap)
 
     // No numeric characters follow the name.
@@ -3390,7 +3396,7 @@ parseRegForInlineAsmConstraint(StringRef C, MVT VT) const {
 /// pointer.
 std::pair<unsigned, const TargetRegisterClass *>
 MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                                 const std::string &Constraint,
+                                                 StringRef Constraint,
                                                  MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
@@ -3546,8 +3552,8 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-bool MipsTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                               Type *Ty,
+bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                               const AddrMode &AM, Type *Ty,
                                                unsigned AS) const {
   // No global is ever allowed as a base.
   if (AM.BaseGV)
@@ -3625,7 +3631,7 @@ void MipsTargetLowering::copyByValRegs(
     FrameObjOffset = VA.getLocMemOffset();
 
   // Create frame object.
-  EVT PtrTy = getPointerTy();
+  EVT PtrTy = getPointerTy(DAG.getDataLayout());
   int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true);
   SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
   InVals.push_back(FIN);
@@ -3662,7 +3668,8 @@ void MipsTargetLowering::passByValArg(
   unsigned OffsetInBytes = 0; // From beginning of struct
   unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
   unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
-  EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
+  EVT PtrTy = getPointerTy(DAG.getDataLayout()),
+      RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
   unsigned NumRegs = LastReg - FirstReg;
 
   if (NumRegs) {
@@ -3787,7 +3794,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
     unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
     SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
     FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
-    SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
                                  MachinePointerInfo(), false, false, 0);
     cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(
@@ -3920,8 +3927,8 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB,
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-unsigned MipsTargetLowering::getRegisterByName(const char* RegName,
-                                               EVT VT) const {
+unsigned MipsTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+                                               SelectionDAG &DAG) const {
   // Named registers is expected to be fairly rare. For now, just support $28
   // since the linux kernel uses it.
   if (Subtarget.isGP64bit()) {
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index bc9a1ce64097..6fe8f830d35d 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -227,7 +227,9 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo) const override;
 
-    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+      return MVT::i32;
+    }
 
     void LowerOperationWrapper(SDNode *N,
                                SmallVectorImpl<SDValue> &Results,
@@ -247,7 +249,8 @@ namespace llvm {
     const char *getTargetNodeName(unsigned Opcode) const override;
 
     /// getSetCCResultType - get the ISD::SETCC result ValueType
-    EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                           EVT VT) const override;
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
@@ -263,7 +266,8 @@ namespace llvm {
 
     void HandleByVal(CCState *, unsigned &, unsigned) const override;
 
-    unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+    unsigned getRegisterByName(const char* RegName, EVT VT,
+                               SelectionDAG &DAG) const override;
 
   protected:
     SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
@@ -478,8 +482,7 @@ namespace llvm {
     bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
 
     // Inline asm support
-    ConstraintType
-      getConstraintType(const std::string &Constraint) const override;
+    ConstraintType getConstraintType(StringRef Constraint) const override;
 
     /// Examine constraint string and operand type and determine a weight value.
     /// The operand object must already have been set up with the operand type.
@@ -493,8 +496,7 @@ namespace llvm {
 
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
@@ -505,8 +507,8 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
-    unsigned getInlineAsmMemConstraint(
-        const std::string &ConstraintCode) const override {
+    unsigned
+    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
       if (ConstraintCode == "R")
         return InlineAsm::Constraint_R;
       else if (ConstraintCode == "ZC")
@@ -514,8 +516,8 @@ namespace llvm {
       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }
 
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
-                               unsigned AS) const override;
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index ec7bf314c641..096b3bee5d07 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -621,10 +621,17 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     !MFI->hasVarSizedObjects();
 }
 
-void MipsSEFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS) const {
-  MachineRegisterInfo &MRI = MF.getRegInfo();
+/// Mark \p Reg and all registers aliasing it in the bitset.
+void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+    SavedRegs.set(*AI);
+}
+
+void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                               BitVector &SavedRegs,
+                                               RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
   MipsABIInfo ABI = STI.getABI();
   unsigned FP = ABI.GetFramePtr();
@@ -632,10 +639,10 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Mark $fp as used if function has dedicated frame pointer.
   if (hasFP(MF))
-    MRI.setPhysRegUsed(FP);
+    setAliasRegs(MF, SavedRegs, FP);
   // Mark $s7 as used if function has dedicated base pointer.
   if (hasBP(MF))
-    MRI.setPhysRegUsed(BP);
+    setAliasRegs(MF, SavedRegs, BP);
 
   // Create spill slots for eh data registers if function calls eh_return.
   if (MipsFI->callsEhReturn())
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 2fcd6bbb9a15..9cb32e6c7829 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -34,8 +34,8 @@ public:
 
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS) const override;
   unsigned ehDataReg(unsigned I) const;
 };
 
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 990a2f8d8c85..cb46d731da29 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -841,7 +841,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
   }
 
   case MipsISD::ThreadPointer: {
-    EVT PtrVT = getTargetLowering()->getPointerTy();
+    EVT PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
     unsigned RdhwrOpc, DestReg;
 
     if (PtrVT == MVT::i32) {
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index ae2837a8582c..b319fd07884b 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -838,8 +838,9 @@ static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG,
 
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
     if (!VT.isVector())
-      return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N),
-                          VT, TL->getScalarShiftAmountTy(VT), DAG);
+      return genConstMult(N->getOperand(0), C->getZExtValue(), SDLoc(N), VT,
+                          TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT),
+                          DAG);
 
   return SDValue(N, 0);
 }
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
deleted file mode 100644
index edd8f670707f..000000000000
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the MipsSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MipsTargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "mips-selectiondag-info"
-
-MipsSelectionDAGInfo::MipsSelectionDAGInfo(const DataLayout &DL)
-    : TargetSelectionDAGInfo(&DL) {}
-
-MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
-}
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
deleted file mode 100644
index 061423fbeb86..000000000000
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Mips subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_MIPS_MIPSSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class MipsTargetMachine;
-
-class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit MipsSelectionDAGInfo(const DataLayout &DL);
-  ~MipsSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index c41bb16a58ea..471b6e19a8bb 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -70,7 +70,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
       HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
       InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
       HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
-      HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(*TM.getDataLayout()),
+      HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(),
       InstrInfo(
           MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
       FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 5f9296812e1c..1db8881404c9 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -18,10 +18,10 @@
 #include "MipsFrameLowering.h"
 #include "MipsISelLowering.h"
 #include "MipsInstrInfo.h"
-#include "MipsSelectionDAGInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
@@ -140,7 +140,7 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
 
   Triple TargetTriple;
 
-  const MipsSelectionDAGInfo TSInfo;
+  const TargetSelectionDAGInfo TSInfo;
   std::unique_ptr<const MipsInstrInfo> InstrInfo;
   std::unique_ptr<const MipsFrameLowering> FrameLowering;
   std::unique_ptr<const MipsTargetLowering> TLInfo;
@@ -275,7 +275,7 @@ public:
   void setHelperClassesMips16();
   void setHelperClassesMipsSE();
 
-  const MipsSelectionDAGInfo *getSelectionDAGInfo() const override {
+  const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
   const MipsInstrInfo *getInstrInfo() const override { return InstrInfo.get(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index c820668befa0..1c77745d130b 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -62,7 +62,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
   if (!ABI.IsN64())
     Ret += "-p:32:32";
 
-  // 8 and 16 bit integers only need no have natural alignment, but try to
+  // 8 and 16 bit integers only need to have natural alignment, but try to
   // align them to 32 bits. 64 bit integers have natural alignment.
   Ret += "-i8:8:32-i16:16:32-i64:64";
 
@@ -237,7 +237,7 @@ TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
     if (Subtarget->allowMixed16_32()) {
       DEBUG(errs() << "No Target Transform Info Pass Added\n");
       // FIXME: This is no longer necessary as the TTI returned is per-function.
-      return TargetTransformInfo(getDataLayout());
+      return TargetTransformInfo(F.getParent()->getDataLayout());
     }
 
     DEBUG(errs() << "Target Transform Info Pass Added\n");
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 221d2f093aeb..ad7302037cad 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -37,7 +37,7 @@ static MCInstrInfo *createNVPTXMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createNVPTXMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   // PTX does not have a return address register.
   InitNVPTXMCRegisterInfo(X, 0);
@@ -46,13 +46,13 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *
 createNVPTXMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createNVPTXMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
-    StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(const Triple &TT,
+                                               Reloc::Model RM,
+                                               CodeModel::Model CM,
+                                               CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
 
   // The default relocation model is used regardless of what the client has
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index cadd7a46cd9d..ecb0f0a1d0a1 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -340,7 +340,7 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
 }
 
 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
 
   Type *Ty = F->getReturnType();
@@ -366,20 +366,20 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
 
       O << ".param .b" << size << " func_retval0";
     } else if (isa<PointerType>(Ty)) {
-      O << ".param .b" << TLI->getPointerTy().getSizeInBits()
+      O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
         << " func_retval0";
     } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
-       unsigned totalsz = TD->getTypeAllocSize(Ty);
+      unsigned totalsz = DL.getTypeAllocSize(Ty);
        unsigned retAlignment = 0;
        if (!llvm::getAlign(*F, 0, retAlignment))
-         retAlignment = TD->getABITypeAlignment(Ty);
+         retAlignment = DL.getABITypeAlignment(Ty);
        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
          << "]";
     } else
       llvm_unreachable("Unknown return type");
   } else {
     SmallVector<EVT, 16> vtparts;
-    ComputeValueVTs(*TLI, Ty, vtparts);
+    ComputeValueVTs(*TLI, DL, Ty, vtparts);
     unsigned idx = 0;
     for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
       unsigned elems = 1;
@@ -1433,7 +1433,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   bool first = true;
   bool isKernelFunc = llvm::isKernelFunction(*F);
   bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
-  MVT thePointerTy = TLI->getPointerTy();
+  MVT thePointerTy = TLI->getPointerTy(*TD);
 
   O << "(\n";
 
@@ -1579,7 +1579,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       // Further, if a part is vector, print the above for
       // each vector element.
       SmallVector<EVT, 16> vtparts;
-      ComputeValueVTs(*TLI, ETy, vtparts);
+      ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts);
       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 09e0bd5d3d88..b75cf4040312 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -80,14 +80,14 @@ static bool IsPTXVectorType(MVT VT) {
 /// NOTE: This is a band-aid for code that expects ComputeValueVTs to return the
 /// same number of types as the Ins/Outs arrays in LowerFormalArguments,
 /// LowerCall, and LowerReturn.
-static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
-                               SmallVectorImpl<EVT> &ValueVTs,
+static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+                               Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
                                SmallVectorImpl<uint64_t> *Offsets = nullptr,
                                uint64_t StartingOffset = 0) {
   SmallVector<EVT, 16> TempVTs;
   SmallVector<uint64_t, 16> TempOffsets;
 
-  ComputeValueVTs(TLI, Ty, TempVTs, &TempOffsets, StartingOffset);
+  ComputeValueVTs(TLI, DL, Ty, TempVTs, &TempOffsets, StartingOffset);
   for (unsigned i = 0, e = TempVTs.size(); i != e; ++i) {
     EVT VT = TempVTs[i];
     uint64_t Off = TempOffsets[i];
@@ -885,15 +885,16 @@ SDValue
 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
-  return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  Op = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+  return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
 }
 
-std::string
-NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
-                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                  unsigned retAlignment,
-                                  const ImmutableCallSite *CS) const {
+std::string NVPTXTargetLowering::getPrototype(
+    const DataLayout &DL, Type *retTy, const ArgListTy &Args,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment,
+    const ImmutableCallSite *CS) const {
+  auto PtrVT = getPointerTy(DL);
 
   bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
@@ -921,13 +922,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
 
       O << ".param .b" << size << " _";
     } else if (isa<PointerType>(retTy)) {
-      O << ".param .b" << getPointerTy().getSizeInBits() << " _";
+      O << ".param .b" << PtrVT.getSizeInBits() << " _";
     } else if ((retTy->getTypeID() == Type::StructTyID) ||
                isa<VectorType>(retTy)) {
-      O << ".param .align "
-        << retAlignment
-        << " .b8 _["
-        << getDataLayout()->getTypeAllocSize(retTy) << "]";
+      auto &DL = CS->getCalledFunction()->getParent()->getDataLayout();
+      O << ".param .align " << retAlignment << " .b8 _["
+        << DL.getTypeAllocSize(retTy) << "]";
     } else {
       llvm_unreachable("Unknown return type");
     }
@@ -936,7 +936,6 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
   O << "_ (";
 
   bool first = true;
-  MVT thePointerTy = getPointerTy();
 
   unsigned OIdx = 0;
   for (unsigned i = 0, e = Args.size(); i != e; ++i, ++OIdx) {
@@ -950,24 +949,23 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
       if (Ty->isAggregateType() || Ty->isVectorTy()) {
         unsigned align = 0;
         const CallInst *CallI = cast<CallInst>(CS->getInstruction());
-        const DataLayout *TD = getDataLayout();
         // +1 because index 0 is reserved for return type alignment
         if (!llvm::getAlign(*CallI, i + 1, align))
-          align = TD->getABITypeAlignment(Ty);
-        unsigned sz = TD->getTypeAllocSize(Ty);
+          align = DL.getABITypeAlignment(Ty);
+        unsigned sz = DL.getTypeAllocSize(Ty);
         O << ".param .align " << align << " .b8 ";
         O << "_";
         O << "[" << sz << "]";
         // update the index for Outs
         SmallVector<EVT, 16> vtparts;
-        ComputeValueVTs(*this, Ty, vtparts);
+        ComputeValueVTs(*this, DL, Ty, vtparts);
         if (unsigned len = vtparts.size())
           OIdx += len - 1;
         continue;
       }
        // i8 types in IR will be i16 types in SDAG
-      assert((getValueType(Ty) == Outs[OIdx].VT ||
-             (getValueType(Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
+      assert((getValueType(DL, Ty) == Outs[OIdx].VT ||
+              (getValueType(DL, Ty) == MVT::i8 && Outs[OIdx].VT == MVT::i16)) &&
              "type mismatch between callee prototype and arguments");
       // scalar type
       unsigned sz = 0;
@@ -976,7 +974,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
         if (sz < 32)
           sz = 32;
       } else if (isa<PointerType>(Ty))
-        sz = thePointerTy.getSizeInBits();
+        sz = PtrVT.getSizeInBits();
       else
         sz = Ty->getPrimitiveSizeInBits();
       O << ".param .b" << sz << " ";
@@ -988,7 +986,7 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
     Type *ETy = PTy->getElementType();
 
     unsigned align = Outs[OIdx].Flags.getByValAlign();
-    unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
+    unsigned sz = DL.getTypeAllocSize(ETy);
     O << ".param .align " << align << " .b8 ";
     O << "_";
     O << "[" << sz << "]";
@@ -1002,7 +1000,6 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
                                           const ImmutableCallSite *CS,
                                           Type *Ty,
                                           unsigned Idx) const {
-  const DataLayout *TD = getDataLayout();
   unsigned Align = 0;
   const Value *DirectCallee = CS->getCalledFunction();
 
@@ -1043,7 +1040,8 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
 
   // Call is indirect or alignment information is not available, fall back to
   // the ABI type alignment
-  return TD->getABITypeAlignment(Ty);
+  auto &DL = CS->getCaller()->getParent()->getDataLayout();
+  return DL.getABITypeAlignment(Ty);
 }
 
 SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
@@ -1064,9 +1062,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   assert(isABI && "Non-ABI compilation is not supported");
   if (!isABI)
     return Chain;
-  const DataLayout *TD = getDataLayout();
   MachineFunction &MF = DAG.getMachineFunction();
   const Function *F = MF.getFunction();
+  auto &DL = MF.getDataLayout();
 
   SDValue tempChain = Chain;
   Chain = DAG.getCALLSEQ_START(Chain,
@@ -1096,11 +1094,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         // aggregate
         SmallVector<EVT, 16> vtparts;
         SmallVector<uint64_t, 16> Offsets;
-        ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
+        ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
+                           0);
 
         unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
         // declare .param .align <align> .b8 .param<n>[<size>];
-        unsigned sz = TD->getTypeAllocSize(Ty);
+        unsigned sz = DL.getTypeAllocSize(Ty);
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
         SDValue DeclareParamOps[] = { Chain, DAG.getConstant(align, dl,
                                                              MVT::i32),
@@ -1137,10 +1136,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         continue;
       }
       if (Ty->isVectorTy()) {
-        EVT ObjectVT = getValueType(Ty);
+        EVT ObjectVT = getValueType(DL, Ty);
         unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
         // declare .param .align <align> .b8 .param<n>[<size>];
-        unsigned sz = TD->getTypeAllocSize(Ty);
+        unsigned sz = DL.getTypeAllocSize(Ty);
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
         SDValue DeclareParamOps[] = { Chain,
                                       DAG.getConstant(align, dl, MVT::i32),
@@ -1321,7 +1320,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     SmallVector<uint64_t, 16> Offsets;
     const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
     assert(PTy && "Type of a byval parameter should be pointer");
-    ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
+    ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
+                       vtparts, &Offsets, 0);
 
     // declare .param .align <align> .b8 .param<n>[<size>];
     unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -1342,9 +1342,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       EVT elemtype = vtparts[j];
       int curOffset = Offsets[j];
       unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
-      SDValue srcAddr =
-          DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
-                      DAG.getConstant(curOffset, dl, getPointerTy()));
+      auto PtrVT = getPointerTy(DAG.getDataLayout());
+      SDValue srcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, OutVals[OIdx],
+                                    DAG.getConstant(curOffset, dl, PtrVT));
       SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
                                    MachinePointerInfo(), false, false, false,
                                    PartAlign);
@@ -1371,12 +1371,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Handle Result
   if (Ins.size() > 0) {
     SmallVector<EVT, 16> resvtparts;
-    ComputeValueVTs(*this, retTy, resvtparts);
+    ComputeValueVTs(*this, DL, retTy, resvtparts);
 
     // Declare
     //  .param .align 16 .b8 retval0[<size-in-bytes>], or
     //  .param .b<size-in-bits> retval0
-    unsigned resultsz = TD->getTypeAllocSizeInBits(retTy);
+    unsigned resultsz = DL.getTypeAllocSizeInBits(retTy);
     // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
     // these three types to match the logic in
     // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
@@ -1415,7 +1415,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // The prototype is embedded in a string and put as the operand for a
     // CallPrototype SDNode which will print out to the value of the string.
     SDVTList ProtoVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    std::string Proto = getPrototype(retTy, Args, Outs, retAlignment, CS);
+    std::string Proto =
+        getPrototype(DAG.getDataLayout(), retTy, Args, Outs, retAlignment, CS);
     const char *ProtoStr =
       nvTM->getManagedStrPool()->getManagedString(Proto.c_str())->c_str();
     SDValue ProtoOps[] = {
@@ -1477,7 +1478,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Generate loads from param memory/moves from registers for result
   if (Ins.size() > 0) {
     if (retTy && retTy->isVectorTy()) {
-      EVT ObjectVT = getValueType(retTy);
+      EVT ObjectVT = getValueType(DL, retTy);
       unsigned NumElts = ObjectVT.getVectorNumElements();
       EVT EltVT = ObjectVT.getVectorElementType();
       assert(STI.getTargetLowering()->getNumRegisters(F->getContext(),
@@ -1590,13 +1591,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
               Elt = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
             InVals.push_back(Elt);
           }
-          Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+          Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
         }
       }
     } else {
       SmallVector<EVT, 16> VTs;
       SmallVector<uint64_t, 16> Offsets;
-      ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
+      ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0);
       assert(VTs.size() == Ins.size() && "Bad value decomposition");
       unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
@@ -1608,8 +1609,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
         SmallVector<EVT, 4> LoadRetVTs;
         EVT TheLoadType = VTs[i];
-        if (retTy->isIntegerTy() &&
-            TD->getTypeAllocSizeInBits(retTy) < 32) {
+        if (retTy->isIntegerTy() && DL.getTypeAllocSizeInBits(retTy) < 32) {
           // This is for integer types only, and specifically not for
           // aggregates.
           LoadRetVTs.push_back(MVT::i32);
@@ -1920,11 +1920,11 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
     }
 
     MemSDNode *MemSD = cast<MemSDNode>(N);
-    const DataLayout *TD = getDataLayout();
+    const DataLayout &TD = DAG.getDataLayout();
 
     unsigned Align = MemSD->getAlignment();
     unsigned PrefAlign =
-      TD->getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
+        TD.getPrefTypeAlignment(ValVT.getTypeForEVT(*DAG.getContext()));
     if (Align < PrefAlign) {
       // This store is not sufficiently aligned, so bail out and let this vector
       // store be scalarized.  Note that we may still be able to emit smaller
@@ -2064,7 +2064,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
     const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
     SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  const DataLayout *TD = getDataLayout();
+  const DataLayout &DL = DAG.getDataLayout();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   const Function *F = MF.getFunction();
   const AttributeSet &PAL = F->getAttributes();
@@ -2118,7 +2119,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
       if (Ty->isAggregateType()) {
         SmallVector<EVT, 16> vtparts;
 
-        ComputePTXValueVTs(*this, Ty, vtparts);
+        ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
         assert(vtparts.size() > 0 && "empty aggregate type not expected");
         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
              ++parti) {
@@ -2130,7 +2131,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
         continue;
       }
       if (Ty->isVectorTy()) {
-        EVT ObjectVT = getValueType(Ty);
+        EVT ObjectVT = getValueType(DL, Ty);
         unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
         for (unsigned parti = 0; parti < NumRegs; ++parti) {
           InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
@@ -2156,13 +2157,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
         // NOTE: Here, we lose the ability to issue vector loads for vectors
         // that are a part of a struct.  This should be investigated in the
         // future.
-        ComputePTXValueVTs(*this, Ty, vtparts, &offsets, 0);
+        ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &offsets,
+                           0);
         assert(vtparts.size() > 0 && "empty aggregate type not expected");
         bool aggregateIsPacked = false;
         if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
           aggregateIsPacked = STy->isPacked();
 
-        SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+        SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
         for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
              ++parti) {
           EVT partVT = vtparts[parti];
@@ -2170,12 +2172,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
               PointerType::get(partVT.getTypeForEVT(F->getContext()),
                                llvm::ADDRESS_SPACE_PARAM));
           SDValue srcAddr =
-              DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
-                          DAG.getConstant(offsets[parti], dl, getPointerTy()));
-          unsigned partAlign =
-              aggregateIsPacked ? 1
-                                : TD->getABITypeAlignment(
-                                      partVT.getTypeForEVT(F->getContext()));
+              DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+                          DAG.getConstant(offsets[parti], dl, PtrVT));
+          unsigned partAlign = aggregateIsPacked
+                                   ? 1
+                                   : DL.getABITypeAlignment(
+                                         partVT.getTypeForEVT(F->getContext()));
           SDValue p;
           if (Ins[InsIdx].VT.getSizeInBits() > partVT.getSizeInBits()) {
             ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 
@@ -2198,8 +2200,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
         continue;
       }
       if (Ty->isVectorTy()) {
-        EVT ObjectVT = getValueType(Ty);
-        SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+        EVT ObjectVT = getValueType(DL, Ty);
+        SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
         unsigned NumElts = ObjectVT.getVectorNumElements();
         assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
                "Vector was not scalarized");
@@ -2212,9 +2214,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
           Value *SrcValue = Constant::getNullValue(PointerType::get(
               EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
           SDValue P = DAG.getLoad(
-              EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
-              false, true,
-              TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+              EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+              true,
+              DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
           if (P.getNode())
             P.getNode()->setIROrder(idx + 1);
 
@@ -2229,9 +2231,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
           Value *SrcValue = Constant::getNullValue(PointerType::get(
               VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
           SDValue P = DAG.getLoad(
-              VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false,
-              false, true,
-              TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+              VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue), false, false,
+              true,
+              DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
           if (P.getNode())
             P.getNode()->setIROrder(idx + 1);
 
@@ -2269,13 +2271,12 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
             Value *SrcValue = Constant::getNullValue(
                 PointerType::get(VecVT.getTypeForEVT(F->getContext()),
                                  llvm::ADDRESS_SPACE_PARAM));
-            SDValue SrcAddr =
-                DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
-                            DAG.getConstant(Ofst, dl, getPointerTy()));
+            SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Arg,
+                                          DAG.getConstant(Ofst, dl, PtrVT));
             SDValue P = DAG.getLoad(
                 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
                 false, true,
-                TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
+                DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
             if (P.getNode())
               P.getNode()->setIROrder(idx + 1);
 
@@ -2288,7 +2289,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
                 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, Ins[InsIdx].VT, Elt);
               InVals.push_back(Elt);
             }
-            Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+            Ofst += DL.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
           }
           InsIdx += NumElts;
         }
@@ -2298,23 +2299,24 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
         continue;
       }
       // A plain scalar.
-      EVT ObjectVT = getValueType(Ty);
+      EVT ObjectVT = getValueType(DL, Ty);
       // If ABI, load from the param symbol
-      SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+      SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
       Value *srcValue = Constant::getNullValue(PointerType::get(
           ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
       SDValue p;
        if (ObjectVT.getSizeInBits() < Ins[InsIdx].VT.getSizeInBits()) {
         ISD::LoadExtType ExtOp = Ins[InsIdx].Flags.isSExt() ? 
                                        ISD::SEXTLOAD : ISD::ZEXTLOAD;
-        p = DAG.getExtLoad(ExtOp, dl, Ins[InsIdx].VT, Root, Arg,
-                           MachinePointerInfo(srcValue), ObjectVT, false, false,
-                           false,
-        TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+        p = DAG.getExtLoad(
+            ExtOp, dl, Ins[InsIdx].VT, Root, Arg, MachinePointerInfo(srcValue),
+            ObjectVT, false, false, false,
+            DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
       } else {
-        p = DAG.getLoad(Ins[InsIdx].VT, dl, Root, Arg,
-                        MachinePointerInfo(srcValue), false, false, false,
-        TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
+        p = DAG.getLoad(
+            Ins[InsIdx].VT, dl, Root, Arg, MachinePointerInfo(srcValue), false,
+            false, false,
+            DL.getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
       }
       if (p.getNode())
         p.getNode()->setIROrder(idx + 1);
@@ -2329,10 +2331,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
     // machine instruction fails because TargetExternalSymbol
     // (not lowered) is target dependent, and CopyToReg assumes
     // the source is lowered.
-    EVT ObjectVT = getValueType(Ty);
+    EVT ObjectVT = getValueType(DL, Ty);
     assert(ObjectVT == Ins[InsIdx].VT &&
            "Ins type did not match function type");
-    SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
+    SDValue Arg = getParamSymbol(DAG, idx, PtrVT);
     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
     if (p.getNode())
       p.getNode()->setIROrder(idx + 1);
@@ -2370,7 +2372,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   MachineFunction &MF = DAG.getMachineFunction();
   const Function *F = MF.getFunction();
   Type *RetTy = F->getReturnType();
-  const DataLayout *TD = getDataLayout();
+  const DataLayout &TD = DAG.getDataLayout();
 
   bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
@@ -2384,7 +2386,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     assert(NumElts == Outs.size() && "Bad scalarization of return value");
 
     // const_cast can be removed in later LLVM versions
-    EVT EltVT = getValueType(RetTy).getVectorElementType();
+    EVT EltVT = getValueType(TD, RetTy).getVectorElementType();
     bool NeedExtend = false;
     if (EltVT.getSizeInBits() < 16)
       NeedExtend = true;
@@ -2435,7 +2437,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       EVT VecVT =
           EVT::getVectorVT(F->getContext(), EltVT, VecSize);
       unsigned PerStoreOffset =
-          TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
+          TD.getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
 
       for (unsigned i = 0; i < NumElts; i += VecSize) {
         // Get values
@@ -2493,7 +2495,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   } else {
     SmallVector<EVT, 16> ValVTs;
     SmallVector<uint64_t, 16> Offsets;
-    ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
+    ComputePTXValueVTs(*this, DAG.getDataLayout(), RetTy, ValVTs, &Offsets, 0);
     assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
 
     for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
@@ -2509,8 +2511,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                TheValType.getVectorElementType(), TmpVal,
                                DAG.getIntPtrConstant(j, dl));
         EVT TheStoreType = ValVTs[i];
-        if (RetTy->isIntegerTy() &&
-            TD->getTypeAllocSizeInBits(RetTy) < 32) {
+        if (RetTy->isIntegerTy() && TD.getTypeAllocSizeInBits(RetTy) < 32) {
           // The following zero-extension is for integer types only, and
           // specifically not for aggregates.
           TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
@@ -3291,14 +3292,14 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
   case Intrinsic::nvvm_ldu_global_i:
   case Intrinsic::nvvm_ldu_global_f:
   case Intrinsic::nvvm_ldu_global_p: {
-
+    auto &DL = I.getModule()->getDataLayout();
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
-      Info.memVT = getValueType(I.getType());
+      Info.memVT = getValueType(DL, I.getType());
     else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
-      Info.memVT = getPointerTy();
+      Info.memVT = getPointerTy(DL);
     else
-      Info.memVT = getValueType(I.getType());
+      Info.memVT = getValueType(DL, I.getType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.vol = 0;
@@ -3311,14 +3312,15 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
   case Intrinsic::nvvm_ldg_global_i:
   case Intrinsic::nvvm_ldg_global_f:
   case Intrinsic::nvvm_ldg_global_p: {
+    auto &DL = I.getModule()->getDataLayout();
 
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
-      Info.memVT = getValueType(I.getType());
+      Info.memVT = getValueType(DL, I.getType());
     else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
-      Info.memVT = getPointerTy();
+      Info.memVT = getPointerTy(DL);
     else
-      Info.memVT = getValueType(I.getType());
+      Info.memVT = getValueType(DL, I.getType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.vol = 0;
@@ -3731,8 +3733,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
 /// Used to guide target specific optimizations, like loop strength reduction
 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
 /// (CodeGenPrepare.cpp)
-bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                                Type *Ty,
+bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                                const AddrMode &AM, Type *Ty,
                                                 unsigned AS) const {
 
   // AddrMode - This represents an addressing mode of:
@@ -3772,7 +3774,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 NVPTXTargetLowering::ConstraintType
-NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
+NVPTXTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default:
@@ -3794,7 +3796,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
 
 std::pair<unsigned, const TargetRegisterClass *>
 NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                                  const std::string &Constraint,
+                                                  StringRef Constraint,
                                                   MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
@@ -4251,7 +4253,6 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
 
 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
-                              const DataLayout *TD,
                               SmallVectorImpl<SDValue> &Results) {
   EVT ResVT = N->getValueType(0);
   SDLoc DL(N);
@@ -4282,8 +4283,9 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   LoadSDNode *LD = cast<LoadSDNode>(N);
 
   unsigned Align = LD->getAlignment();
+  auto &TD = DAG.getDataLayout();
   unsigned PrefAlign =
-    TD->getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
+      TD.getPrefTypeAlignment(ResVT.getTypeForEVT(*DAG.getContext()));
   if (Align < PrefAlign) {
     // This load is not sufficiently aligned, so bail out and let this vector
     // load be scalarized.  Note that we may still be able to emit smaller
@@ -4495,7 +4497,7 @@ void NVPTXTargetLowering::ReplaceNodeResults(
   default:
     report_fatal_error("Unhandled custom legalization");
   case ISD::LOAD:
-    ReplaceLoadVector(N, DAG, getDataLayout(), Results);
+    ReplaceLoadVector(N, DAG, Results);
     return;
   case ISD::INTRINSIC_W_CHAIN:
     ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index ed94775b3002..e5c37321a33b 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -456,24 +456,23 @@ public:
   /// Used to guide target specific optimizations, like loop strength
   /// reduction (LoopStrengthReduce.cpp) and memory optimization for
   /// address mode (CodeGenPrepare.cpp)
-  bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
 
   /// getFunctionAlignment - Return the Log2 alignment of this function.
   unsigned getFunctionAlignment(const Function *F) const;
 
-  EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
+  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
+                         EVT VT) const override {
     if (VT.isVector())
       return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
     return MVT::i1;
   }
 
-  ConstraintType
-  getConstraintType(const std::string &Constraint) const override;
+  ConstraintType getConstraintType(StringRef Constraint) const override;
   std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                               const std::string &Constraint,
-                               MVT VT) const override;
+                               StringRef Constraint, MVT VT) const override;
 
   SDValue LowerFormalArguments(
       SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
@@ -483,7 +482,7 @@ public:
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;
 
-  std::string getPrototype(Type *, const ArgListTy &,
+  std::string getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
                            const SmallVectorImpl<ISD::OutputArg> &,
                            unsigned retAlignment,
                            const ImmutableCallSite *CS) const;
@@ -501,7 +500,9 @@ public:
   const NVPTXTargetMachine *nvTM;
 
   // PTX always uses 32-bit shift amounts
-  MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+    return MVT::i32;
+  }
 
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(EVT VT) const override;
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 6ab0fadf9a35..0bf72febc4a0 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -57,7 +57,6 @@ char NVPTXLowerAggrCopies::ID = 0;
 // Lower MemTransferInst or load-store pair to loop
 static void convertTransferToLoop(
     Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
-    //unsigned numLoads,
     bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
   Type *indType = len->getType();
 
@@ -84,6 +83,8 @@ static void convertTransferToLoop(
   ind->addIncoming(ConstantInt::get(indType, 0), origBB);
 
   // load from srcAddr+ind
+  // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
+  // word-sized loads and stores.
   Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
                                srcVolatile);
   // store at dstAddr+ind
@@ -137,13 +138,10 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
   //
   // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
   //
-  //const BasicBlock *firstBB = &F.front();  // first BB in F
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
-    //BasicBlock *bb = BI;
     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
          ++II) {
       if (LoadInst *load = dyn_cast<LoadInst>(II)) {
-
         if (!load->hasOneUse())
           continue;
 
@@ -152,7 +150,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
 
         User *use = load->user_back();
         if (StoreInst *store = dyn_cast<StoreInst>(use)) {
-          if (store->getOperand(0) != load) //getValueOperand
+          if (store->getOperand(0) != load)
             continue;
           aggrLoads.push_back(load);
         }
@@ -188,8 +186,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
   //
   // Do the transformation of an aggr load/copy/set to a loop
   //
-  for (unsigned i = 0, e = aggrLoads.size(); i != e; ++i) {
-    LoadInst *load = aggrLoads[i];
+  for (LoadInst *load : aggrLoads) {
     StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
     Value *srcAddr = load->getOperand(0);
     Value *dstAddr = store->getOperand(1);
@@ -203,20 +200,19 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
     load->eraseFromParent();
   }
 
-  for (unsigned i = 0, e = aggrMemcpys.size(); i != e; ++i) {
-    MemTransferInst *cpy = aggrMemcpys[i];
-    Value *len = cpy->getLength();
-    // llvm 2.7 version of memcpy does not have volatile
-    // operand yet. So always making it non-volatile
-    // optimistically, so that we don't see unnecessary
-    // st.volatile in ptx
-    convertTransferToLoop(cpy, cpy->getSource(), cpy->getDest(), len, false,
-                          false, Context, F);
+  for (MemTransferInst *cpy : aggrMemcpys) {
+    convertTransferToLoop(/* splitAt */ cpy,
+                          /* srcAddr */ cpy->getSource(),
+                          /* dstAddr */ cpy->getDest(),
+                          /* len */ cpy->getLength(),
+                          /* srcVolatile */ cpy->isVolatile(),
+                          /* dstVolatile */ cpy->isVolatile(),
+                          /* Context */ Context,
+                          /* Function F */ F);
     cpy->eraseFromParent();
   }
 
-  for (unsigned i = 0, e = aggrMemsets.size(); i != e; ++i) {
-    MemSetInst *memsetinst = aggrMemsets[i];
+  for (MemSetInst *memsetinst : aggrMemsets) {
     Value *len = memsetinst->getLength();
     Value *val = memsetinst->getValue();
     convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e83f735a551e..5a83371b07f1 100644
--- a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -2,7 +2,7 @@
 //
 //                     The LLVM Compiler Infrastructure
 //
-// This file is distributed under the University of Illinois Open Source 
+// This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
@@ -115,7 +115,7 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
 
     replaceImageHandle(Handle, MF);
 
-    return true; 
+    return true;
   }
 
   return false;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 71645dca69c5..bd2509a3c8c9 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -48,7 +48,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                                const NVPTXTargetMachine &TM)
     : NVPTXGenSubtargetInfo(TT, CPU, FS), PTXVersion(0), SmVersion(20), TM(TM),
       InstrInfo(), TLInfo(TM, initializeSubtargetDependencies(CPU, FS)),
-      TSInfo(TM.getDataLayout()), FrameLowering() {}
+      FrameLowering() {}
 
 bool NVPTXSubtarget::hasImageHandles() const {
   // Enable handles for Kepler+, where CUDA supports indirect surfaces and
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 9d9072efc382..248f9e117d83 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -148,8 +148,9 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis(
-      [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); });
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(NVPTXTTIImpl(this, F));
+  });
 }
 
 void NVPTXPassConfig::addIRPasses() {
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index dc81802f4b5a..e7250cdba5ac 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -94,7 +94,7 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost(
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
@@ -117,3 +117,15 @@ unsigned NVPTXTTIImpl::getArithmeticInstrCost(
                                          Opd1PropInfo, Opd2PropInfo);
   }
 }
+
+void NVPTXTTIImpl::getUnrollingPreferences(Loop *L,
+                                           TTI::UnrollingPreferences &UP) {
+  BaseT::getUnrollingPreferences(L, UP);
+
+  // Enable partial unrolling and runtime unrolling, but reduce the
+  // threshold.  This partially unrolls small loops which are often
+  // unrolled by the PTX to SASS compiler and unrolling earlier can be
+  // beneficial.
+  UP.Partial = UP.Runtime = true;
+  UP.PartialThreshold = UP.Threshold / 4;
+}
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 4280888988f9..5bcd1e27a558 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -37,8 +37,9 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
   const NVPTXTargetLowering *getTLI() const { return TLI; };
 
 public:
-  explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM)
-      : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+  explicit NVPTXTTIImpl(const NVPTXTargetMachine *TM, const Function &F)
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
+        TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   NVPTXTTIImpl(const NVPTXTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
   NVPTXTTIImpl(NVPTXTTIImpl &&Arg)
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
-  NVPTXTTIImpl &operator=(const NVPTXTTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  NVPTXTTIImpl &operator=(NVPTXTTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
 
   bool hasBranchDivergence() { return true; }
 
@@ -69,6 +58,8 @@ public:
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
+
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index fe168a547597..c0c83cc258b8 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -33,7 +33,6 @@ add_llvm_target(PowerPCCodeGen
   PPCTargetObjectFile.cpp
   PPCTargetTransformInfo.cpp
   PPCTOCRegDeps.cpp
-  PPCSelectionDAGInfo.cpp
   PPCTLSDynamicCall.cpp
   PPCVSXCopy.cpp
   PPCVSXFMAMutate.cpp
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 5c38fe173d96..30f232a9a91e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -51,10 +51,9 @@ static MCInstrInfo *createPPCMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
-  Triple TheTriple(TT);
-  bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
-                  TheTriple.getArch() == Triple::ppc64le);
+static MCRegisterInfo *createPPCMCRegisterInfo(const Triple &TT) {
+  bool isPPC64 =
+      (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le);
   unsigned Flavour = isPPC64 ? 0 : 1;
   unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
 
@@ -65,9 +64,7 @@ static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *createPPCMCSubtargetInfo(const Triple &TT,
                                                  StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitPPCMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createPPCMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
 static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
@@ -90,22 +87,20 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createPPCMCCodeGenInfo(const Triple &TT, Reloc::Model RM,
                                              CodeModel::Model CM,
                                              CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
 
   if (RM == Reloc::Default) {
-    Triple T(TT);
-    if (T.isOSDarwin())
+    if (TT.isOSDarwin())
       RM = Reloc::DynamicNoPIC;
     else
       RM = Reloc::Static;
   }
   if (CM == CodeModel::Default) {
-    Triple T(TT);
-    if (!T.isOSDarwin() &&
-        (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
+    if (!TT.isOSDarwin() &&
+        (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
       CM = CodeModel::Medium;
   }
   X->initMCCodeGenInfo(RM, CM, OL);
@@ -231,7 +226,7 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
 static MCTargetStreamer *
 createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
   const Triple &TT = STI.getTargetTriple();
-  if (TT.getObjectFormat() == Triple::ELF)
+  if (TT.isOSBinFormatELF())
     return new PPCTargetELFStreamer(S);
   return new PPCTargetMachOStreamer(S);
 }
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 87a5236e711f..199a0debf88b 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -197,7 +197,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
     // External or weakly linked global variables need non-lazily-resolved stubs
     if (TM.getRelocationModel() != Reloc::Static &&
-        (GV->isDeclaration() || GV->isWeakForLinker())) {
+        !GV->isStrongDefinitionForLinker()) {
       if (!GV->hasHiddenVisibility()) {
         SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
         MachineModuleInfoImpl::StubValueTy &StubSym = 
@@ -369,28 +369,70 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
     assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
            "High 16 bits of call target should be zero.");
     unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
-    EncodedBytes = 6*4;
+    EncodedBytes = 0;
     // Materialize the jump address:
     EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
                                     .addReg(ScratchReg)
                                     .addImm((CallTarget >> 32) & 0xFFFF));
+    ++EncodedBytes;
     EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
                                     .addReg(ScratchReg)
                                     .addReg(ScratchReg)
                                     .addImm(32).addImm(16));
+    ++EncodedBytes;
     EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
                                     .addReg(ScratchReg)
                                     .addReg(ScratchReg)
                                     .addImm((CallTarget >> 16) & 0xFFFF));
+    ++EncodedBytes;
     EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
                                     .addReg(ScratchReg)
                                     .addReg(ScratchReg)
                                     .addImm(CallTarget & 0xFFFF));
 
+    // Save the current TOC pointer before the remote call.
+    int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
+                                    .addReg(PPC::X2)
+                                    .addImm(TOCSaveOffset)
+                                    .addReg(PPC::X1));
+    ++EncodedBytes;
+
+
+    // If we're on ELFv1, then we need to load the actual function pointer from
+    // the function descriptor.
+    if (!Subtarget->isELFv2ABI()) {
+      // Load the new TOC pointer and the function address, but not r11
+      // (needing this is rare, and loading it here would prevent passing it
+      // via a 'nest' parameter.
+      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+                                      .addReg(PPC::X2)
+                                      .addImm(8)
+                                      .addReg(ScratchReg));
+      ++EncodedBytes;
+      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+                                      .addReg(ScratchReg)
+                                      .addImm(0)
+                                      .addReg(ScratchReg));
+      ++EncodedBytes;
+    }
+
     EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8).addReg(ScratchReg));
+    ++EncodedBytes;
     EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+    ++EncodedBytes;
+
+    // Restore the TOC pointer after the call.
+    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+                                    .addReg(PPC::X2)
+                                    .addImm(TOCSaveOffset)
+                                    .addReg(PPC::X1));
+    ++EncodedBytes;
   }
 
+  // Each instruction is 4 bytes.
+  EncodedBytes *= 4;
+
   // Emit padding.
   unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
   assert(NumBytes >= EncodedBytes &&
@@ -624,7 +666,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       IsExternal = GV->isDeclaration();
       IsCommon = GV->hasCommonLinkage();
       IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
-        (GV->isDeclaration() || GV->isWeakForLinker());
+        !GV->isStrongDefinitionForLinker();
       IsAvailExt = GV->hasAvailableExternallyLinkage();
     } else if (MO.isCPI())
       MOSymbol = GetCPISymbol(MO.getIndex());
@@ -706,7 +748,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MOSymbol = getSymbol(GV);
       IsExternal = GV->isDeclaration();
       IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
-        (GV->isDeclaration() || GV->isWeakForLinker());
+        !GV->isStrongDefinitionForLinker();
     } else if (MO.isCPI())
       MOSymbol = GetCPISymbol(MO.getIndex());
 
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 416131745806..baadf081a64c 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -351,8 +351,9 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
             Opcode = ISD::FTRUNC; break;
           }
 
-          MVT VTy =
-            TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+          auto &DL = CI->getModule()->getDataLayout();
+          MVT VTy = TLI->getSimpleValueType(DL, CI->getArgOperand(0)->getType(),
+                                            true);
           if (VTy == MVT::Other)
             return true;
           
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 874a6fce0656..5bc9124f8085 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -133,6 +133,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
   // register having an odd register number.
   CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
 
+  // The 'nest' parameter, if any, is passed in R11.
+  CCIfNest<CCAssignToReg<[R11]>>,
+
   // The first 8 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
 
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index fafcd76f9d18..5f236f744fc4 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -262,7 +262,7 @@ static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
 // fast-isel, and return its equivalent machine type in VT.
 // FIXME: Copied directly from ARM -- factor into base class?
 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
-  EVT Evt = TLI.getValueType(Ty, true);
+  EVT Evt = TLI.getValueType(DL, Ty, true);
 
   // Only handle simple types.
   if (Evt == MVT::Other || !Evt.isSimple()) return false;
@@ -324,12 +324,13 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
       return PPCComputeAddress(U->getOperand(0), Addr);
     case Instruction::IntToPtr:
       // Look past no-op inttoptrs.
-      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+          TLI.getPointerTy(DL))
         return PPCComputeAddress(U->getOperand(0), Addr);
       break;
     case Instruction::PtrToInt:
       // Look past no-op ptrtoints.
-      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
         return PPCComputeAddress(U->getOperand(0), Addr);
       break;
     case Instruction::GetElementPtr: {
@@ -799,7 +800,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
                              bool IsZExt, unsigned DestReg) {
   Type *Ty = SrcValue1->getType();
-  EVT SrcEVT = TLI.getValueType(Ty, true);
+  EVT SrcEVT = TLI.getValueType(DL, Ty, true);
   if (!SrcEVT.isSimple())
     return false;
   MVT SrcVT = SrcEVT.getSimpleVT();
@@ -893,8 +894,8 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
 // Attempt to fast-select a floating-point extend instruction.
 bool PPCFastISel::SelectFPExt(const Instruction *I) {
   Value *Src  = I->getOperand(0);
-  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
-  EVT DestVT = TLI.getValueType(I->getType(), true);
+  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
 
   if (SrcVT != MVT::f32 || DestVT != MVT::f64)
     return false;
@@ -911,8 +912,8 @@ bool PPCFastISel::SelectFPExt(const Instruction *I) {
 // Attempt to fast-select a floating-point truncate instruction.
 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
   Value *Src  = I->getOperand(0);
-  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
-  EVT DestVT = TLI.getValueType(I->getType(), true);
+  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
 
   if (SrcVT != MVT::f64 || DestVT != MVT::f32)
     return false;
@@ -992,7 +993,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
     return false;
 
   Value *Src = I->getOperand(0);
-  EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+  EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
   if (!SrcEVT.isSimple())
     return false;
 
@@ -1157,7 +1158,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
 // Attempt to fast-select a binary integer operation that isn't already
 // handled automatically.
 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
-  EVT DestVT  = TLI.getValueType(I->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
 
   // We can get here in the case when we have a binary operation on a non-legal
   // type and the target independent selector doesn't know how to handle it.
@@ -1594,7 +1595,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
 
   if (Ret->getNumOperands() > 0) {
     SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
@@ -1641,7 +1642,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
         RetRegs.push_back(VA.getLocReg());
         unsigned SrcReg = Reg + VA.getValNo();
 
-        EVT RVEVT = TLI.getValueType(RV->getType());
+        EVT RVEVT = TLI.getValueType(DL, RV->getType());
         if (!RVEVT.isSimple())
           return false;
         MVT RVVT = RVEVT.getSimpleVT();
@@ -1769,8 +1770,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
 // Attempt to fast-select an integer truncate instruction.
 bool PPCFastISel::SelectTrunc(const Instruction *I) {
   Value *Src  = I->getOperand(0);
-  EVT SrcVT  = TLI.getValueType(Src->getType(), true);
-  EVT DestVT = TLI.getValueType(I->getType(), true);
+  EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
+  EVT DestVT = TLI.getValueType(DL, I->getType(), true);
 
   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
     return false;
@@ -1806,8 +1807,8 @@ bool PPCFastISel::SelectIntExt(const Instruction *I) {
   if (!SrcReg) return false;
 
   EVT SrcEVT, DestEVT;
-  SrcEVT = TLI.getValueType(SrcTy, true);
-  DestEVT = TLI.getValueType(DestTy, true);
+  SrcEVT = TLI.getValueType(DL, SrcTy, true);
+  DestEVT = TLI.getValueType(DL, DestTy, true);
   if (!SrcEVT.isSimple())
     return false;
   if (!DestEVT.isSimple())
@@ -1979,7 +1980,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
     // on the "if" path here.
     if (CModel == CodeModel::Large ||
         (GV->getType()->getElementType()->isFunctionTy() &&
-         (GV->isDeclaration() || GV->isWeakForLinker())) ||
+         !GV->isStrongDefinitionForLinker()) ||
         GV->isDeclaration() || GV->hasCommonLinkage() ||
         GV->hasAvailableExternallyLinkage())
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
@@ -2127,7 +2128,7 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
 // Materialize a constant into a register, and return the register
 // number (or zero if we failed to handle it).
 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
-  EVT CEVT = TLI.getValueType(C->getType(), true);
+  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 
   // Only handle simple types.
   if (!CEVT.isSimple()) return 0;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index b4008e4a886a..87229d80d9c1 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   DebugLoc dl = MI->getDebugLoc();
 
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
   unsigned UsedRegMask = 0;
   for (unsigned i = 0; i != 32; ++i)
-    if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+    if (MRI.isPhysRegModified(VRRegNo[i]))
       UsedRegMask |= 1 << (31-i);
 
   // Live in and live out values already must be in the mask, so don't bother
@@ -1158,9 +1159,11 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   }
 }
 
-void
-PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                   RegScavenger *) const {
+void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                            BitVector &SavedRegs,
+                                            RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
   const PPCRegisterInfo *RegInfo =
       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
 
@@ -1168,8 +1171,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   unsigned LR = RegInfo->getRARegister();
   FI->setMustSaveLR(MustSaveLR(MF, LR));
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  MRI.setPhysRegUnused(LR);
+  SavedRegs.reset(LR);
 
   //  Save R31 if necessary
   int FPSI = FI->getFramePointerSaveIndex();
@@ -1214,9 +1216,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
   // function uses CR 2, 3, or 4.
   if (!isPPC64 && !isDarwinABI &&
-      (MRI.isPhysRegUsed(PPC::CR2) ||
-       MRI.isPhysRegUsed(PPC::CR3) ||
-       MRI.isPhysRegUsed(PPC::CR4))) {
+      (SavedRegs.test(PPC::CR2) ||
+       SavedRegs.test(PPC::CR3) ||
+       SavedRegs.test(PPC::CR4))) {
     int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
     FI->setCRSpillFrameIndex(FrameIdx);
   }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 28d074ecd79d..d6a389bfbf0d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -45,8 +45,8 @@ public:
   bool needsFP(const MachineFunction &MF) const;
   void replaceFPWithRealFP(MachineFunction &MF) const;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS = nullptr) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS = nullptr) const override;
   void processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                      RegScavenger *RS = nullptr) const override;
   void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index c85c2610d2f5..01a3acb742e6 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -102,7 +102,8 @@ namespace {
 
     /// getSmallIPtrImm - Return a target constant of pointer type.
     inline SDValue getSmallIPtrImm(unsigned Imm, SDLoc dl) {
-      return CurDAG->getTargetConstant(Imm, dl, PPCLowering->getPointerTy());
+      return CurDAG->getTargetConstant(
+          Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
     }
 
     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
@@ -313,7 +314,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
     const Module *M = MF->getFunction()->getParent();
     DebugLoc dl;
 
-    if (PPCLowering->getPointerTy() == MVT::i32) {
+    if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
       if (PPCSubTarget->isTargetELF()) {
         GlobalBaseReg = PPC::R30;
         if (M->getPICLevel() == PICLevel::Small) {
@@ -342,7 +343,8 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
     }
   }
   return CurDAG->getRegister(GlobalBaseReg,
-                             PPCLowering->getPointerTy()).getNode();
+                             PPCLowering->getPointerTy(CurDAG->getDataLayout()))
+      .getNode();
 }
 
 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
@@ -2205,7 +2207,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   SDLoc dl(N);
   unsigned Imm;
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
-  EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT =
+      CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
   bool isPPC64 = (PtrVT == MVT::i64);
 
   if (!PPCSubTarget->useCRBits() &&
@@ -2468,10 +2471,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[] = { Offset, Base, Chain };
-      return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
-                                      LD->getValueType(0),
-                                      PPCLowering->getPointerTy(),
-                                      MVT::Other, Ops));
+      return transferMemOperands(
+          N, CurDAG->getMachineNode(
+                 Opcode, dl, LD->getValueType(0),
+                 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+                 Ops));
     } else {
       unsigned Opcode;
       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -2506,10 +2510,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[] = { Base, Offset, Chain };
-      return transferMemOperands(N, CurDAG->getMachineNode(Opcode, dl,
-                                      LD->getValueType(0),
-                                      PPCLowering->getPointerTy(),
-                                      MVT::Other, Ops));
+      return transferMemOperands(
+          N, CurDAG->getMachineNode(
+                 Opcode, dl, LD->getValueType(0),
+                 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other,
+                 Ops));
     }
   }
 
@@ -2662,7 +2667,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::SELECT_CC: {
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-    EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+    EVT PtrVT =
+        CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
     bool isPPC64 = (PtrVT == MVT::i64);
 
     // If this is a select of i1 operands, we'll pattern match it.
@@ -2901,7 +2907,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
       const GlobalValue *GValue = G->getGlobal();
       if ((GValue->getType()->getElementType()->isFunctionTy() &&
-           (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+           !GValue->isStrongDefinitionForLinker()) ||
           GValue->isDeclaration() || GValue->hasCommonLinkage() ||
           GValue->hasAvailableExternallyLinkage())
         return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
@@ -2915,7 +2921,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     // Generate a PIC-safe GOT reference.
     assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
       "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
-    return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(),  MVT::i32);
+    return CurDAG->SelectNodeTo(
+        N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(CurDAG->getDataLayout()),
+        MVT::i32);
   }
   case PPCISD::VADD_SPLAT: {
     // This expands into one of three sequences, depending on whether
@@ -3398,9 +3406,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
   bool IsModified;
   do {
     IsModified = false;
-    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
-         E = CurDAG->allnodes_end(); I != E; ++I) {
-      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+    for (SDNode &Node : CurDAG->allnodes()) {
+      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
       if (!MachineNode || MachineNode->use_empty())
         continue;
       SDNode *ResNode = MachineNode;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 594472bbb47b..0ed9b051ffed 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -952,7 +952,8 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
 
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
+                                                  const DataLayout &DL) const {
   // Darwin passes everything on 4 byte boundary.
   if (Subtarget.isDarwin())
     return 4;
@@ -1055,7 +1056,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   return nullptr;
 }
 
-EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
+                                          EVT VT) const {
   if (!VT.isVector())
     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
 
@@ -1101,7 +1103,7 @@ static bool isConstantOrUndef(int Op, int Val) {
 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
-  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+  bool IsLE = DAG.getDataLayout().isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
@@ -1132,7 +1134,7 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
                                SelectionDAG &DAG) {
-  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+  bool IsLE = DAG.getDataLayout().isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
@@ -1174,7 +1176,7 @@ bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
   if (!Subtarget.hasP8Vector())
     return false;
 
-  bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+  bool IsLE = DAG.getDataLayout().isLittleEndian();
   if (ShuffleKind == 0) {
     if (IsLE)
       return false;
@@ -1237,7 +1239,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
 /// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              unsigned ShuffleKind, SelectionDAG &DAG) {
-  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+  if (DAG.getDataLayout().isLittleEndian()) {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 0, 0);
     else if (ShuffleKind == 2) // swapped
@@ -1262,7 +1264,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 /// the input operands are swapped (see PPCInstrAltivec.td).
 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
                              unsigned ShuffleKind, SelectionDAG &DAG) {
-  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+  if (DAG.getDataLayout().isLittleEndian()) {
     if (ShuffleKind == 1) // unary
       return isVMerge(N, UnitSize, 8, 8);
     else if (ShuffleKind == 2) // swapped
@@ -1352,7 +1354,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
  */
 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
                               unsigned ShuffleKind, SelectionDAG &DAG) {
-  if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+  if (DAG.getDataLayout().isLittleEndian()) {
     unsigned indexOffset = CheckEven ? 4 : 0;
     if (ShuffleKind == 1) // Unary
       return isVMerge(N, indexOffset, 0);
@@ -1399,7 +1401,7 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
   if (ShiftAmt < i) return -1;
 
   ShiftAmt -= i;
-  bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+  bool isLE = DAG.getDataLayout().isLittleEndian();
 
   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
     // Check the rest of the elements to see if they are consecutive.
@@ -1456,7 +1458,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
                                 SelectionDAG &DAG) {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   assert(isSplatShuffleMask(SVOp, EltSize));
-  if (DAG.getTarget().getDataLayout()->isLittleEndian())
+  if (DAG.getDataLayout().isLittleEndian())
     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
   else
     return SVOp->getMaskElt(0) / EltSize;
@@ -1796,7 +1798,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
     }
   }
 
-  Disp = DAG.getTargetConstant(0, dl, getPointerTy());
+  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
@@ -2084,7 +2086,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   SDLoc dl(GA);
   const GlobalValue *GV = GA->getGlobal();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   bool is64bit = Subtarget.isPPC64();
   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
   PICLevel::Level picLevel = M->getPICLevel();
@@ -2270,7 +2272,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                                       const PPCSubtarget &Subtarget) const {
   SDNode *Node = Op.getNode();
   EVT VT = Node->getValueType(0);
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   SDValue InChain = Node->getOperand(0);
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
@@ -2399,11 +2401,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
   SDLoc dl(Op);
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   bool isPPC64 = (PtrVT == MVT::i64);
-  Type *IntPtrTy =
-    DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
-                                                             *DAG.getContext());
+  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -2440,7 +2440,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
-    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
@@ -2476,8 +2476,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
 
-
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
 
   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
                                             PtrVT);
@@ -2797,7 +2796,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
   MachineFrameInfo *MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
@@ -3023,7 +3022,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
          "fastcc not supported on varargs functions");
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
                        (CallConv == CallingConv::Fast));
@@ -3059,12 +3058,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
   unsigned NumBytes = LinkageSize;
   unsigned AvailableFPRs = Num_FPR_Regs;
   unsigned AvailableVRs = Num_VR_Regs;
-  for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    if (Ins[i].Flags.isNest())
+      continue;
+
     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
                                PtrByteSize, LinkageSize, ParamAreaSize,
                                NumBytes, AvailableFPRs, AvailableVRs,
                                Subtarget.hasQPX()))
       HasParameterArea = true;
+  }
 
   // Add DAG nodes to load the arguments or copy them out of registers.  On
   // entry to a function on PPC, the arguments start after the linkage area,
@@ -3216,6 +3219,17 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
     case MVT::i1:
     case MVT::i32:
     case MVT::i64:
+      if (Flags.isNest()) {
+        // The 'nest' parameter, if any, is passed in R11.
+        unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+          ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+        break;
+      }
+
       // These can be scalar arguments or elements of an integer array type
       // passed directly.  Clang may use those instead of "byval" aggregate
       // types to avoid forcing arguments to memory unnecessarily.
@@ -3425,7 +3439,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   MachineFrameInfo *MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
   bool isPPC64 = PtrVT == MVT::i64;
   // Potential tail calls could cause overwriting of argument stack slots.
   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
@@ -3845,7 +3859,8 @@ static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
     return nullptr;  // Top 6 bits have to be sext of immediate.
 
   return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
-                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+                         DAG.getTargetLoweringInfo().getPointerTy(
+                             DAG.getDataLayout())).getNode();
 }
 
 namespace {
@@ -3991,7 +4006,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
                  SDLoc dl) {
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   if (!isTailCall) {
     if (isVector) {
       SDValue StackPtr;
@@ -4053,7 +4068,7 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
 static
 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
                      SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
-                     bool isTailCall, bool IsPatchPoint,
+                     bool isTailCall, bool IsPatchPoint, bool hasNest,
                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
                      ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
@@ -4062,7 +4077,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
   bool isSVR4ABI = Subtarget.isSVR4ABI();
   bool isELFv2ABI = Subtarget.isELFv2ABI();
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   NodeTys.push_back(MVT::Other);   // Returns a chain
   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
 
@@ -4084,8 +4099,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
          (Subtarget.getTargetTriple().isMacOSX() &&
           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
-         (G->getGlobal()->isDeclaration() ||
-          G->getGlobal()->isWeakForLinker())) ||
+         !G->getGlobal()->isStrongDefinitionForLinker()) ||
         (Subtarget.isTargetELF() && !isPPC64 &&
          !G->getGlobal()->hasLocalLinkage() &&
          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
@@ -4196,11 +4210,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
       Chain = TOCVal.getValue(0);
       InFlag = TOCVal.getValue(1);
 
-      SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
-                                        InFlag);
+      // If the function call has an explicit 'nest' parameter, it takes the
+      // place of the environment pointer.
+      if (!hasNest) {
+        SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+                                          InFlag);
 
-      Chain = EnvVal.getValue(0);
-      InFlag = EnvVal.getValue(1);
+        Chain = EnvVal.getValue(0);
+        InFlag = EnvVal.getValue(1);
+      }
 
       MTCTROps[0] = Chain;
       MTCTROps[1] = LoadFuncPtr;
@@ -4218,7 +4236,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
     CallOpc = PPCISD::BCTRL;
     Callee.setNode(nullptr);
     // Add use of X11 (holding environment pointer)
-    if (isSVR4ABI && isPPC64 && !isELFv2ABI)
+    if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
     // Add CTR register as callee so a bctr can be emitted later.
     if (isTailCall)
@@ -4254,8 +4272,7 @@ static
 bool isLocalCall(const SDValue &Callee)
 {
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    return !G->getGlobal()->isDeclaration() &&
-           !G->getGlobal()->isWeakForLinker();
+    return G->getGlobal()->isStrongDefinitionForLinker();
   return false;
 }
 
@@ -4308,7 +4325,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 SDValue
 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                               bool isTailCall, bool isVarArg, bool IsPatchPoint,
-                              SelectionDAG &DAG,
+                              bool hasNest, SelectionDAG &DAG,
                               SmallVector<std::pair<unsigned, SDValue>, 8>
                                 &RegsToPass,
                               SDValue InFlag, SDValue Chain,
@@ -4321,8 +4338,8 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
   std::vector<EVT> NodeTys;
   SmallVector<SDValue, 8> Ops;
   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
-                                 SPDiff, isTailCall, IsPatchPoint, RegsToPass,
-                                 Ops, NodeTys, CS, Subtarget);
+                                 SPDiff, isTailCall, IsPatchPoint, hasNest,
+                                 RegsToPass, Ops, NodeTys, CS, Subtarget);
 
   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
@@ -4381,7 +4398,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
       // allocated and an unnecessary move instruction being generated.
       CallOpc = PPCISD::BCTRL_LOAD_TOC;
 
-      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
@@ -4586,7 +4603,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
       unsigned LocMemOffset = ByValVA.getLocMemOffset();
 
       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
-      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+                           StackPtr, PtrOff);
 
       // Create a copy of the argument in the local area of the current
       // stack frame.
@@ -4623,7 +4641,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
 
       if (!isTailCall) {
         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
-        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
+                             StackPtr, PtrOff);
 
         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
                                            MachinePointerInfo(),
@@ -4664,7 +4683,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
                     false, TailCallArguments);
 
-  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+                    /* unused except on PPC64 ELFv1 */ false, DAG,
                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
                     NumBytes, Ins, InVals, CS);
 }
@@ -4703,8 +4723,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
   bool isELFv2ABI = Subtarget.isELFv2ABI();
   bool isLittleEndian = Subtarget.isLittleEndian();
   unsigned NumOps = Outs.size();
+  bool hasNest = false;
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   unsigned PtrByteSize = 8;
 
   MachineFunction &MF = DAG.getMachineFunction();
@@ -4758,6 +4779,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     EVT ArgVT = Outs[i].VT;
     EVT OrigVT = Outs[i].ArgVT;
 
+    if (Flags.isNest())
+      continue;
+
     if (CallConv == CallingConv::Fast) {
       if (Flags.isByVal())
         NumGPRsUsed += (Flags.getByValSize()+7)/8;
@@ -5021,6 +5045,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     case MVT::i1:
     case MVT::i32:
     case MVT::i64:
+      if (Flags.isNest()) {
+        // The 'nest' parameter, if any, is passed in R11.
+        RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
+        hasNest = true;
+        break;
+      }
+
       // These can be scalar arguments or elements of an integer array type
       // passed directly.  Clang may use those instead of "byval" aggregate
       // types to avoid forcing arguments to memory unnecessarily.
@@ -5302,9 +5333,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
                     FPOp, true, TailCallArguments);
 
-  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
-                    RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
-                    NumBytes, Ins, InVals, CS);
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+		    hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+                    Callee, SPDiff, NumBytes, Ins, InVals, CS);
 }
 
 SDValue
@@ -5320,7 +5351,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
 
   unsigned NumOps = Outs.size();
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   bool isPPC64 = PtrVT == MVT::i64;
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
@@ -5693,7 +5724,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
                     FPOp, true, TailCallArguments);
 
-  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
+                    /* unused except on PPC64 ELFv1 */ false, DAG,
                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
                     NumBytes, Ins, InVals, CS);
 }
@@ -5764,7 +5796,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   SDLoc dl(Op);
 
   // Get the corect type for pointers.
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
 
   // Construct the stack pointer operand.
   bool isPPC64 = Subtarget.isPPC64();
@@ -5794,7 +5826,7 @@ SDValue
 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool isPPC64 = Subtarget.isPPC64();
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
 
   // Get current frame pointer save index.  The users of this index will be
   // primarily DYNALLOC instructions.
@@ -5817,7 +5849,7 @@ SDValue
 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool isPPC64 = Subtarget.isPPC64();
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
 
   // Get current frame pointer save index.  The users of this index will be
   // primarily DYNALLOC instructions.
@@ -5845,7 +5877,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   SDLoc dl(Op);
 
   // Get the corect type for pointers.
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
   // Negate the size.
   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
                                 DAG.getConstant(0, dl, PtrVT), Size);
@@ -5888,8 +5920,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   SDValue BasePtr = LD->getBasePtr();
   MachineMemOperand *MMO = LD->getMemOperand();
 
-  SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
-                                 BasePtr, MVT::i8, MMO);
+  SDValue NewLD =
+      DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
+                     BasePtr, MVT::i8, MMO);
   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
 
   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
@@ -5913,7 +5946,8 @@ SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   SDValue Value = ST->getValue();
   MachineMemOperand *MMO = ST->getMemOperand();
 
-  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
+                      Value);
   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
 }
 
@@ -6374,7 +6408,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
                SINT.getOperand(0).getValueType() == MVT::i32) {
       MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+      EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
 
       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -6419,7 +6453,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
   // then lfd it and fcfid it.
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
 
   SDValue Ld;
   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
@@ -6506,7 +6540,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
 
   MachineFunction &MF = DAG.getMachineFunction();
   EVT VT = Op.getValueType();
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
 
   // Save FP Control Word to register
   EVT NodeTys[] = {
@@ -6727,7 +6761,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
     int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
     MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
-    EVT PtrVT = getPointerTy();
+    EVT PtrVT = getPointerTy(DAG.getDataLayout());
     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
     assert(BVN->getNumOperands() == 4 &&
@@ -6760,9 +6794,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
       }
 
       Constant *CP = ConstantVector::get(CV);
-      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
-                      16 /* alignment */);
- 
+      SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
+                                          16 /* alignment */);
+
       SmallVector<SDValue, 2> Ops;
       Ops.push_back(DAG.getEntryNode());
       Ops.push_back(CPIdx);
@@ -7453,7 +7487,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
   // Create a stack slot that is 16-byte aligned.
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   // Store the input value into Value#0 of the stack slot.
@@ -7499,7 +7533,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   SDValue StoreChain = DAG.getEntryNode();
@@ -7651,9 +7685,9 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
 
     SmallVector<SDValue, 8> Stores;
     for (unsigned Idx = 0; Idx < 4; ++Idx) {
-      SDValue Ex =
-        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
-                    DAG.getConstant(Idx, dl, getVectorIdxTy()));
+      SDValue Ex = DAG.getNode(
+          ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
+          DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
       SDValue Store;
       if (ScalarVT != ScalarMemVT)
         Store =
@@ -7715,7 +7749,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
   SmallVector<SDValue, 2> Ops;
@@ -7920,7 +7954,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
 
     assert(N->getValueType(0) == MVT::i1 &&
            "Unexpected result type for CTR decrement intrinsic");
-    EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+    EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                                 N->getValueType(0));
     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
                                  N->getOperand(1)); 
@@ -8248,7 +8283,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   unsigned mainDstReg = MRI.createVirtualRegister(RC);
   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
 
-  MVT PVT = getPointerTy();
+  MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
          "Invalid Pointer Size!");
   // For v = setjmp(buf), we generate
@@ -8386,7 +8421,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
 
-  MVT PVT = getPointerTy();
+  MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
          "Invalid Pointer Size!");
 
@@ -9032,6 +9067,19 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
+static std::string getRecipOp(const char *Base, EVT VT) {
+  std::string RecipOp(Base);
+  if (VT.getScalarType() == MVT::f64)
+    RecipOp += "d";
+  else
+    RecipOp += "f";
+
+  if (VT.isVector())
+    RecipOp = "vec-" + RecipOp;
+
+  return RecipOp;
+}
+
 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
                                             DAGCombinerInfo &DCI,
                                             unsigned &RefinementSteps,
@@ -9043,13 +9091,12 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. For both FRE and FRSQRTE, the minimum
-    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
-    // 2^-14. IEEE float has 23 digits and double has 52 digits.
-    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
-    if (VT.getScalarType() == MVT::f64)
-      ++RefinementSteps;
+    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    std::string RecipOp = getRecipOp("sqrt", VT);
+    if (!Recips.isEnabled(RecipOp))
+      return SDValue();
+
+    RefinementSteps = Recips.getRefinementSteps(RecipOp);
     UseOneConstNR = true;
     return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
@@ -9066,13 +9113,12 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    // Convergence is quadratic, so we essentially double the number of digits
-    // correct after every iteration. For both FRE and FRSQRTE, the minimum
-    // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
-    // 2^-14. IEEE float has 23 digits and double has 52 digits.
-    RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
-    if (VT.getScalarType() == MVT::f64)
-      ++RefinementSteps;
+    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    std::string RecipOp = getRecipOp("div", VT);
+    if (!Recips.isEnabled(RecipOp))
+      return SDValue();
+
+    RefinementSteps = Recips.getRefinementSteps(RecipOp);
     return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();
@@ -9854,7 +9900,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
 
   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
          "Invalid extension type");
-  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
   SDValue ShiftCst =
     DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
   return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 
@@ -10145,9 +10191,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
 
     EVT MemVT = LD->getMemoryVT();
     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
-    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+    unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
-    unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
+    unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
     if (LD->isUnindexed() && VT.isVector() &&
         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
           // P8 and later hardware should just use LOAD.
@@ -10219,7 +10265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                 2*MemVT.getStoreSize()-1);
 
       // Create the new base load.
-      SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy());
+      SDValue LDXIntID =
+          DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
       SDValue BaseLoad =
         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
@@ -10243,7 +10290,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       if (!findConsecutiveLoad(LD, DAG))
         --IncValue;
 
-      SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy());
+      SDValue Increment =
+          DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
 
       MachineMemOperand *ExtraMMO =
@@ -10691,7 +10739,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
 /// getConstraintType - Given a constraint, return the type of
 /// constraint it is for this target.
 PPCTargetLowering::ConstraintType
-PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+PPCTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default: break;
@@ -10776,7 +10824,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
 
 std::pair<unsigned, const TargetRegisterClass *>
 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                                const std::string &Constraint,
+                                                StringRef Constraint,
                                                 MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC RS6000 Constraint Letters
@@ -10923,8 +10971,8 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
 // isLegalAddressingMode - Return true if the addressing mode represented
 // by AM is legal for this target, for a load/store of the specified type.
-bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                              Type *Ty,
+bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                              const AddrMode &AM, Type *Ty,
                                               unsigned AS) const {
   // PPC does not allow r+i addressing modes for vectors!
   if (Ty->isVectorTy() && AM.BaseOffs != 0)
@@ -10977,22 +11025,22 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setLRStoreRequired();
   bool isPPC64 = Subtarget.isPPC64();
+  auto PtrVT = getPointerTy(MF.getDataLayout());
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
                         isPPC64 ? MVT::i64 : MVT::i32);
-    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                   FrameAddr, Offset),
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
                        MachinePointerInfo(), false, false, false, 0);
   }
 
   // Just load the return address off the stack.
   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
+                     MachinePointerInfo(), false, false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -11000,13 +11048,13 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   SDLoc dl(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
-  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  bool isPPC64 = PtrVT == MVT::i64;
-
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
 
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
+  bool isPPC64 = PtrVT == MVT::i64;
+
   // Naked functions never have a frame pointer, and so we use r1. For all
   // other functions, this decision must be delayed until during PEI.
   unsigned FrameReg;
@@ -11026,8 +11074,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
-                                              EVT VT) const {
+unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
+                                              SelectionDAG &DAG) const {
   bool isPPC64 = Subtarget.isPPC64();
   bool isDarwinABI = Subtarget.isDarwinABI();
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 02242b512a4f..6e13533cfdb3 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -423,7 +423,9 @@ namespace llvm {
     /// DAG node.
     const char *getTargetNodeName(unsigned Opcode) const override;
 
-    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+      return MVT::i32;
+    }
 
     bool isCheapToSpeculateCttz() const override {
       return true;
@@ -434,7 +436,8 @@ namespace llvm {
     }
 
     /// getSetCCResultType - Return the ISD::SETCC ValueType
-    EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                           EVT VT) const override;
 
     /// Return true if target always beneficiates from combining into FMA for a
     /// given value type. This must typically return false on targets where FMA
@@ -487,7 +490,8 @@ namespace llvm {
     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                           std::vector<SDNode *> *Created) const override;
 
-    unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+    unsigned getRegisterByName(const char* RegName, EVT VT,
+                               SelectionDAG &DAG) const override;
 
     void computeKnownBitsForTargetNode(const SDValue Op,
                                        APInt &KnownZero,
@@ -519,8 +523,7 @@ namespace llvm {
     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
                                          MachineBasicBlock *MBB) const;
 
-    ConstraintType
-    getConstraintType(const std::string &Constraint) const override;
+    ConstraintType getConstraintType(StringRef Constraint) const override;
 
     /// Examine constraint string and operand type and determine a weight value.
     /// The operand object must already have been set up with the operand type.
@@ -529,13 +532,13 @@ namespace llvm {
 
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
     /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
     /// function arguments in the caller parameter area.  This is the actual
     /// alignment, not its logarithm.
-    unsigned getByValTypeAlignment(Type *Ty) const override;
+    unsigned getByValTypeAlignment(Type *Ty,
+                                   const DataLayout &DL) const override;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops.
@@ -544,8 +547,8 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
-    unsigned getInlineAsmMemConstraint(
-        const std::string &ConstraintCode) const override {
+    unsigned
+    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
       if (ConstraintCode == "es")
         return InlineAsm::Constraint_es;
       else if (ConstraintCode == "o")
@@ -561,8 +564,8 @@ namespace llvm {
 
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
-                               unsigned AS) const override;
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
 
     /// isLegalICmpImmediate - Return true if the specified immediate is legal
     /// icmp immediate, that is the target has icmp instructions which can
@@ -745,7 +748,7 @@ namespace llvm {
                             SDLoc dl, SelectionDAG &DAG,
                             SmallVectorImpl<SDValue> &InVals) const;
     SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
-                       bool isVarArg, bool IsPatchPoint,
+                       bool isVarArg, bool IsPatchPoint, bool hasNest,
                        SelectionDAG &DAG,
                        SmallVector<std::pair<unsigned, SDValue>, 8>
                          &RegsToPass,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 696a83860e53..bf6e40296405 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
 cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
 cl::Hidden);
 
+static cl::opt<bool>
+UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,
+  cl::desc("Use the old (incorrect) instruction latency calculation"));
+
 // Pin the vtable to this file.
 void PPCInstrInfo::anchor() {}
 
@@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
   return new ScoreboardHazardRecognizer(II, DAG);
 }
 
+unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *MI,
+                                       unsigned *PredCost) const {
+  if (!ItinData || UseOldLatencyCalc)
+    return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);
+
+  // The default implementation of getInstrLatency calls getStageLatency, but
+  // getStageLatency does not do the right thing for us. While we have
+  // itinerary, most cores are fully pipelined, and so the itineraries only
+  // express the first part of the pipeline, not every stage. Instead, we need
+  // to use the listed output operand cycle number (using operand 0 here, which
+  // is an output).
+
+  unsigned Latency = 1;
+  unsigned DefClass = MI->getDesc().getSchedClass();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || MO.isImplicit())
+      continue;
+
+    int Cycle = ItinData->getOperandCycle(DefClass, i);
+    if (Cycle < 0)
+      continue;
+
+    Latency = std::max(Latency, (unsigned) Cycle);
+  }
+
+  return Latency;
+}
 
 int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
                                     const MachineInstr *DefMI, unsigned DefIdx,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index e2d6346aa532..40badae644d6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -95,6 +95,10 @@ public:
   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                      const ScheduleDAG *DAG) const override;
 
+  unsigned getInstrLatency(const InstrItineraryData *ItinData,
+                           const MachineInstr *MI,
+                           unsigned *PredCost = nullptr) const override;
+
   int getOperandLatency(const InstrItineraryData *ItinData,
                         const MachineInstr *DefMI, unsigned DefIdx,
                         const MachineInstr *UseMI,
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 43ba4994fde6..20c95fe888e0 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -989,6 +989,18 @@ def : Pat<(int_ppc_vsx_xvdivsp v4f32:$A, v4f32:$B),
 def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
           (XVDIVDP $A, $B)>;
 
+// Reciprocal estimate
+def : Pat<(int_ppc_vsx_xvresp v4f32:$A),
+          (XVRESP $A)>;
+def : Pat<(int_ppc_vsx_xvredp v2f64:$A),
+          (XVREDP $A)>;
+
+// Recip. square root estimate
+def : Pat<(int_ppc_vsx_xvrsqrtesp v4f32:$A),
+          (XVRSQRTESP $A)>;
+def : Pat<(int_ppc_vsx_xvrsqrtedp v2f64:$A),
+          (XVRSQRTEDP $A)>;
+
 } // AddedComplexity
 } // HasVSX
 
@@ -1013,6 +1025,9 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                                                     v4i32:$XB)))]>;
   } // isCommutable
 
+  def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
+            (XXLEQV $A, $B)>;
+
   def XXLORC : XX3Form<60, 170,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 656376c641aa..2b09b2f625de 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -165,8 +165,7 @@ void PPCRegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
-  const PPCFrameLowering *PPCFI =
-      static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
+  const PPCFrameLowering *TFI = getFrameLowering(MF);
 
   // The ZERO register is not really a register, but the representation of r0
   // when used in instructions that treat r0 as the constant 0.
@@ -209,7 +208,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::X1);
     Reserved.set(PPC::X13);
 
-    if (PPCFI->needsFP(MF))
+    if (TFI->needsFP(MF))
       Reserved.set(PPC::X31);
 
     if (hasBasePointer(MF))
@@ -230,7 +229,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
-  if (PPCFI->needsFP(MF))
+  if (TFI->needsFP(MF))
     Reserved.set(PPC::R31);
 
   if (hasBasePointer(MF)) {
@@ -256,8 +255,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
 unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                               MachineFunction &MF) const {
-  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
-  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  const PPCFrameLowering *TFI = getFrameLowering(MF);
   const unsigned DefaultSafety = 1;
 
   switch (RC->getID()) {
@@ -341,7 +339,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   unsigned FrameSize = MFI->getStackSize();
   
   // Get stack alignments.
-  unsigned TargetAlign = Subtarget.getFrameLowering()->getStackAlignment();
+  const PPCFrameLowering *TFI = getFrameLowering(MF);
+  unsigned TargetAlign = TFI->getStackAlignment();
   unsigned MaxAlign = MFI->getMaxAlignment();
   assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
          "Maximum call-frame size not sufficiently aligned");
@@ -864,8 +863,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 }
 
 unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
-  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  const PPCFrameLowering *TFI = getFrameLowering(MF);
 
   if (!TM.isPPC64())
     return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
@@ -908,10 +906,10 @@ bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 }
 
 bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
-  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  const PPCFrameLowering *TFI = getFrameLowering(MF);
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
-  unsigned StackAlign = Subtarget.getFrameLowering()->getStackAlignment();
+  unsigned StackAlign = TFI->getStackAlignment();
   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                               F->hasFnAttribute(Attribute::StackAlignment));
 
@@ -946,11 +944,8 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
 
   MachineBasicBlock &MBB = *MI->getParent();
   MachineFunction &MF = *MBB.getParent();
-  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
-  const PPCFrameLowering *PPCFI =
-      static_cast<const PPCFrameLowering *>(Subtarget.getFrameLowering());
-  unsigned StackEst =
-    PPCFI->determineFrameLayout(MF, false, true);
+  const PPCFrameLowering *TFI = getFrameLowering(MF);
+  unsigned StackEst = TFI->determineFrameLayout(MF, false, true);
 
   // If we likely don't need a stack frame, then we probably don't need a
   // virtual base register either.
@@ -1034,4 +1029,3 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
          MI->getOpcode() == TargetOpcode::PATCHPOINT ||
          (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
 }
-
diff --git a/lib/Target/PowerPC/PPCScheduleP7.td b/lib/Target/PowerPC/PPCScheduleP7.td
index 635d154d10bf..267f56726180 100644
--- a/lib/Target/PowerPC/PPCScheduleP7.td
+++ b/lib/Target/PowerPC/PPCScheduleP7.td
@@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries<
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_VS1, P7_VS2]>],
                                   [5, 1, 1]>,
+  InstrItinData<IIC_FPAddSub    , [InstrStage<1, [P7_DU1, P7_DU2,
+                                                  P7_DU3, P7_DU4], 0>,
+                                   InstrStage<1, [P7_VS1, P7_VS2]>],
+                                  [5, 1, 1]>,
   InstrItinData<IIC_FPCompare   , [InstrStage<1, [P7_DU1, P7_DU2,
                                                   P7_DU3, P7_DU4], 0>,
                                    InstrStage<1, [P7_VS1, P7_VS2]>],
diff --git a/lib/Target/PowerPC/PPCScheduleP8.td b/lib/Target/PowerPC/PPCScheduleP8.td
index 020739baec3a..69e6d05c6604 100644
--- a/lib/Target/PowerPC/PPCScheduleP8.td
+++ b/lib/Target/PowerPC/PPCScheduleP8.td
@@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries<
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
                                   [5, 1, 1]>,
+  InstrItinData<IIC_FPAddSub    , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
+                                                  P8_DU4, P8_DU5, P8_DU6], 0>,
+                                   InstrStage<1, [P8_FPU1, P8_FPU2]>],
+                                  [5, 1, 1]>,
   InstrItinData<IIC_FPCompare   , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
                                                   P8_DU4, P8_DU5, P8_DU6], 0>,
                                    InstrStage<1, [P8_FPU1, P8_FPU2]>],
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
deleted file mode 100644
index dc1674214769..000000000000
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the PPCSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "PPCTargetMachine.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "powerpc-selectiondag-info"
-
-PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
-    : TargetSelectionDAGInfo(DL) {}
-
-PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
deleted file mode 100644
index 2c1378d5670d..000000000000
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_POWERPC_PPCSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class PPCTargetMachine;
-
-class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit PPCSelectionDAGInfo(const DataLayout *DL);
-  ~PPCSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index cf603fe17723..58daccae90f2 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -53,7 +53,7 @@ PPCSubtarget::PPCSubtarget(const Triple &TT, const std::string &CPU,
       IsPPC64(TargetTriple.getArch() == Triple::ppc64 ||
               TargetTriple.getArch() == Triple::ppc64le),
       TM(TM), FrameLowering(initializeSubtargetDependencies(CPU, FS)),
-      InstrInfo(*this), TLInfo(TM, *this), TSInfo(TM.getDataLayout()) {}
+      InstrInfo(*this), TLInfo(TM, *this) {}
 
 void PPCSubtarget::initializeEnvironment() {
   StackAlignment = 16;
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e9cc3d4bd5bc..0616c1f65604 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -17,10 +17,10 @@
 #include "PPCFrameLowering.h"
 #include "PPCISelLowering.h"
 #include "PPCInstrInfo.h"
-#include "PPCSelectionDAGInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
@@ -129,7 +129,7 @@ protected:
   PPCFrameLowering FrameLowering;
   PPCInstrInfo InstrInfo;
   PPCTargetLowering TLInfo;
-  PPCSelectionDAGInfo TSInfo;
+  TargetSelectionDAGInfo TSInfo;
 
 public:
   /// This constructor initializes the data members to match that
@@ -164,7 +164,7 @@ public:
   const PPCTargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
-  const PPCSelectionDAGInfo *getSelectionDAGInfo() const override {
+  const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
   const PPCRegisterInfo *getRegisterInfo() const override {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 074bc870751a..1daf244fed44 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -172,7 +172,26 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
                         computeFSAdditions(FS, OL, TT), Options, RM, CM, OL),
       TLOF(createTLOF(getTargetTriple())),
-      TargetABI(computeTargetABI(TT, Options)) {
+      TargetABI(computeTargetABI(TT, Options)),
+      Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
+
+  // For the estimates, convergence is quadratic, so we essentially double the
+  // number of digits correct after every iteration. For both FRE and FRSQRTE,
+  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+           RefinementSteps64 = RefinementSteps + 1;
+
+  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
+  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
+
+  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
+  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
+
   initAsmInfo();
 }
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 5c0f7e629a69..6496339519a1 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -29,6 +29,8 @@ public:
 private:
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
   PPCABI TargetABI;
+  PPCSubtarget Subtarget;
+
   mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap;
 
 public:
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 25d563a7d975..e21c2b77f4d7 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -317,7 +317,7 @@ unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                      unsigned Alignment,
                                      unsigned AddressSpace) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
          "Invalid Opcode");
 
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 35e7a1497c83..368bef93f0dd 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -38,7 +38,8 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
 
 public:
   explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
-      : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   PPCTTIImpl(const PPCTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
   PPCTTIImpl(PPCTTIImpl &&Arg)
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
-  PPCTTIImpl &operator=(const PPCTTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  PPCTTIImpl &operator=(PPCTTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
 
   /// \name Scalar TTI Implementations
   /// @{
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index f352fa647ace..58d3c3d3fa2e 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -136,6 +136,16 @@ protected:
         // source of the copy, it must still be live here.  We can't use
         // interval testing for a physical register, so as long as we're
         // walking the MIs we may as well test liveness here.
+        //
+        // FIXME: There is a case that occurs in practice, like this:
+        //   %vreg9<def> = COPY %F1; VSSRC:%vreg9
+        //   ...
+        //   %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9
+        //   %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9
+        //   %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC:
+        //   %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC:
+        //   %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC:
+        // which prevents an otherwise-profitable transformation.
         bool OtherUsers = false, KillsAddendSrc = false;
         for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
              J != JE; --J) {
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index e7ab71ac2106..3fb1dcc3d4af 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -80,6 +80,7 @@ struct PPCVSXSwapEntry {
   unsigned int IsSwap : 1;
   unsigned int MentionsPhysVR : 1;
   unsigned int IsSwappable : 1;
+  unsigned int MentionsPartialVR : 1;
   unsigned int SpecialHandling : 3;
   unsigned int WebRejected : 1;
   unsigned int WillRemove : 1;
@@ -91,7 +92,9 @@ enum SHValues {
   SH_INSERT,
   SH_NOSWAP_LD,
   SH_NOSWAP_ST,
-  SH_SPLAT
+  SH_SPLAT,
+  SH_XXPERMDI,
+  SH_COPYSCALAR
 };
 
 struct PPCVSXSwapRemoval : public MachineFunctionPass {
@@ -167,6 +170,21 @@ private:
             isRegInClass(Reg, &PPC::VRRCRegClass));
   }
 
+  // Return true iff the given register is a partial vector register.
+  bool isScalarVecReg(unsigned Reg) {
+    return (isRegInClass(Reg, &PPC::VSFRCRegClass) ||
+            isRegInClass(Reg, &PPC::VSSRCRegClass));
+  }
+
+  // Return true iff the given register mentions all or part of a
+  // vector register.  Also sets Partial to true if the mention
+  // is for just the floating-point register overlap of the register.
+  bool isAnyVecReg(unsigned Reg, bool &Partial) {
+    if (isScalarVecReg(Reg))
+      Partial = true;
+    return isScalarVecReg(Reg) || isVecReg(Reg);
+  }
+
 public:
   // Main entry point for this pass.
   bool runOnMachineFunction(MachineFunction &MF) override {
@@ -223,12 +241,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
     for (MachineInstr &MI : MBB) {
 
       bool RelevantInstr = false;
+      bool Partial = false;
 
       for (const MachineOperand &MO : MI.operands()) {
         if (!MO.isReg())
           continue;
         unsigned Reg = MO.getReg();
-        if (isVecReg(Reg)) {
+        if (isAnyVecReg(Reg, Partial)) {
           RelevantInstr = true;
           break;
         }
@@ -250,8 +269,13 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         // Unless noted otherwise, an instruction is considered
         // safe for the optimization.  There are a large number of
         // such true-SIMD instructions (all vector math, logical,
-        // select, compare, etc.).
-        SwapVector[VecIdx].IsSwappable = 1;
+        // select, compare, etc.).  However, if the instruction
+        // mentions a partial vector register and does not have
+        // special handling defined, it is not swappable.
+        if (Partial)
+          SwapVector[VecIdx].MentionsPartialVR = 1;
+        else
+          SwapVector[VecIdx].IsSwappable = 1;
         break;
       case PPC::XXPERMDI: {
         // This is a swap if it is of the form XXPERMDI t, s, s, 2.
@@ -269,25 +293,37 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
                                                VecIdx);
           if (trueReg1 == trueReg2)
             SwapVector[VecIdx].IsSwap = 1;
-        }
+          else {
+            // We can still handle these if the two registers are not
+            // identical, by adjusting the form of the XXPERMDI.
+            SwapVector[VecIdx].IsSwappable = 1;
+            SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+          }
         // This is a doubleword splat if it is of the form
         // XXPERMDI t, s, s, 0 or XXPERMDI t, s, s, 3.  As above we
         // must look through chains of copy-likes to find the source
         // register.  We turn off the marking for mention of a physical
         // register, because splatting it is safe; the optimization
-        // will not swap the value in the physical register.
-        else if (immed == 0 || immed == 3) {
+        // will not swap the value in the physical register.  Whether
+        // or not the two input registers are identical, we can handle
+        // these by adjusting the form of the XXPERMDI.
+        } else if (immed == 0 || immed == 3) {
+
+          SwapVector[VecIdx].IsSwappable = 1;
+          SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
+
           unsigned trueReg1 = lookThruCopyLike(MI.getOperand(1).getReg(),
                                                VecIdx);
           unsigned trueReg2 = lookThruCopyLike(MI.getOperand(2).getReg(),
                                                VecIdx);
-          if (trueReg1 == trueReg2) {
-            SwapVector[VecIdx].IsSwappable = 1;
+          if (trueReg1 == trueReg2)
             SwapVector[VecIdx].MentionsPhysVR = 0;
-          }
+
+        } else {
+          // We can still handle these by adjusting the form of the XXPERMDI.
+          SwapVector[VecIdx].IsSwappable = 1;
+          SwapVector[VecIdx].SpecialHandling = SHValues::SH_XXPERMDI;
         }
-        // Any other form of XXPERMDI is lane-sensitive and unsafe
-        // for the optimization.
         break;
       }
       case PPC::LVX:
@@ -324,7 +360,32 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         if (isVecReg(MI.getOperand(0).getReg()) &&
             isVecReg(MI.getOperand(1).getReg()))
           SwapVector[VecIdx].IsSwappable = 1;
+        // If we have a copy from one scalar floating-point register
+        // to another, we can accept this even if it is a physical
+        // register.  The only way this gets involved is if it feeds
+        // a SUBREG_TO_REG, which is handled by introducing a swap.
+        else if (isScalarVecReg(MI.getOperand(0).getReg()) &&
+                 isScalarVecReg(MI.getOperand(1).getReg()))
+          SwapVector[VecIdx].IsSwappable = 1;
+        break;
+      case PPC::SUBREG_TO_REG: {
+        // These are fine provided they are moving between full vector
+        // register classes.  If they are moving from a scalar
+        // floating-point class to a vector class, we can handle those
+        // as well, provided we introduce a swap.  It is generally the
+        // case that we will introduce fewer swaps than we remove, but
+        // (FIXME) a cost model could be used.  However, introduced
+        // swaps could potentially be CSEd, so this is not trivial.
+        if (isVecReg(MI.getOperand(0).getReg()) &&
+            isVecReg(MI.getOperand(2).getReg()))
+          SwapVector[VecIdx].IsSwappable = 1;
+        else if (isVecReg(MI.getOperand(0).getReg()) &&
+                 isScalarVecReg(MI.getOperand(2).getReg())) {
+          SwapVector[VecIdx].IsSwappable = 1;
+          SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
+        }
         break;
+      }
       case PPC::VSPLTB:
       case PPC::VSPLTH:
       case PPC::VSPLTW:
@@ -425,6 +486,10 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
       case PPC::VUPKLSW:
       case PPC::XXMRGHW:
       case PPC::XXMRGLW:
+      // XXSLDWI could be replaced by a general permute with one of three
+      // permute control vectors (for shift values 1, 2, 3).  However,
+      // VPERM has a more restrictive register class.
+      case PPC::XXSLDWI:
       case PPC::XXSPLTW:
         break;
       }
@@ -501,18 +566,20 @@ void PPCVSXSwapRemoval::formWebs() {
     DEBUG(MI->dump());
 
     // It's sufficient to walk vector uses and join them to their unique
-    // definitions.  In addition, check *all* vector register operands
-    // for physical regs.
+    // definitions.  In addition, check full vector register operands
+    // for physical regs.  We exclude partial-vector register operands
+    // because we can handle them if copied to a full vector.
     for (const MachineOperand &MO : MI->operands()) {
       if (!MO.isReg())
         continue;
 
       unsigned Reg = MO.getReg();
-      if (!isVecReg(Reg))
+      if (!isVecReg(Reg) && !isScalarVecReg(Reg))
         continue;
 
       if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
-        SwapVector[EntryIdx].MentionsPhysVR = 1;
+        if (!(MI->isCopy() && isScalarVecReg(Reg)))
+          SwapVector[EntryIdx].MentionsPhysVR = 1;
         continue;
       }
 
@@ -545,15 +612,21 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
   for (unsigned EntryIdx = 0; EntryIdx < SwapVector.size(); ++EntryIdx) {
     int Repr = EC->getLeaderValue(SwapVector[EntryIdx].VSEId);
 
-    // Reject webs containing mentions of physical registers, or containing
-    // operations that we don't know how to handle in a lane-permuted region.
+    // If representative is already rejected, don't waste further time.
+    if (SwapVector[Repr].WebRejected)
+      continue;
+
+    // Reject webs containing mentions of physical or partial registers, or
+    // containing operations that we don't know how to handle in a lane-
+    // permuted region.
     if (SwapVector[EntryIdx].MentionsPhysVR ||
+        SwapVector[EntryIdx].MentionsPartialVR ||
         !(SwapVector[EntryIdx].IsSwappable || SwapVector[EntryIdx].IsSwap)) {
 
       SwapVector[Repr].WebRejected = 1;
 
       DEBUG(dbgs() <<
-            format("Web %d rejected for physreg, subreg, or not swap[pable]\n",
+            format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
                    Repr));
       DEBUG(dbgs() << "  in " << EntryIdx << ": ");
       DEBUG(SwapVector[EntryIdx].VSEMI->dump());
@@ -588,7 +661,7 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
         }
       }
 
-    // Reject webs than contain swapping stores that are fed by something
+    // Reject webs that contain swapping stores that are fed by something
     // other than a swap instruction.
     } else if (SwapVector[EntryIdx].IsStore && SwapVector[EntryIdx].IsSwap) {
       MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
@@ -670,7 +743,8 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
 // The identified swap entry requires special handling to allow its
 // containing computation to be optimized.  Perform that handling
 // here.
-// FIXME: This code is to be phased in with subsequent patches.
+// FIXME: Additional opportunities will be phased in with subsequent
+// patches.
 void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
   switch (SwapVector[EntryIdx].SpecialHandling) {
 
@@ -704,6 +778,91 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
     break;
   }
 
+  // For an XXPERMDI that isn't handled otherwise, we need to
+  // reverse the order of the operands.  If the selector operand
+  // has a value of 0 or 3, we need to change it to 3 or 0,
+  // respectively.  Otherwise we should leave it alone.  (This
+  // is equivalent to reversing the two bits of the selector
+  // operand and complementing the result.)
+  case SHValues::SH_XXPERMDI: {
+    MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+    DEBUG(dbgs() << "Changing XXPERMDI: ");
+    DEBUG(MI->dump());
+
+    unsigned Selector = MI->getOperand(3).getImm();
+    if (Selector == 0 || Selector == 3)
+      Selector = 3 - Selector;
+    MI->getOperand(3).setImm(Selector);
+
+    unsigned Reg1 = MI->getOperand(1).getReg();
+    unsigned Reg2 = MI->getOperand(2).getReg();
+    MI->getOperand(1).setReg(Reg2);
+    MI->getOperand(2).setReg(Reg1);
+
+    DEBUG(dbgs() << "  Into: ");
+    DEBUG(MI->dump());
+    break;
+  }
+
+  // For a copy from a scalar floating-point register to a vector
+  // register, removing swaps will leave the copied value in the
+  // wrong lane.  Insert a swap following the copy to fix this.
+  case SHValues::SH_COPYSCALAR: {
+    MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
+
+    DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
+    DEBUG(MI->dump());
+
+    unsigned DstReg = MI->getOperand(0).getReg();
+    const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
+    unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+
+    MI->getOperand(0).setReg(NewVReg);
+    DEBUG(dbgs() << "  Into: ");
+    DEBUG(MI->dump());
+
+    MachineBasicBlock::iterator InsertPoint = MI->getNextNode();
+
+    // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
+    // is copying to a VRRC, we need to be careful to avoid a register
+    // assignment problem.  In this case we must copy from VRRC to VSRC
+    // prior to the swap, and from VSRC to VRRC following the swap.
+    // Coalescing will usually remove all this mess.
+
+    if (DstRC == &PPC::VRRCRegClass) {
+      unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+      unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
+
+      BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+              TII->get(PPC::COPY), VSRCTmp1)
+        .addReg(NewVReg);
+      DEBUG(MI->getNextNode()->dump());
+
+      BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+              TII->get(PPC::XXPERMDI), VSRCTmp2)
+        .addReg(VSRCTmp1)
+        .addReg(VSRCTmp1)
+        .addImm(2);
+      DEBUG(MI->getNextNode()->getNextNode()->dump());
+
+      BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+              TII->get(PPC::COPY), DstReg)
+        .addReg(VSRCTmp2);
+      DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
+
+    } else {
+
+      BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+              TII->get(PPC::XXPERMDI), DstReg)
+        .addReg(NewVReg)
+        .addReg(NewVReg)
+        .addImm(2);
+
+      DEBUG(MI->getNextNode()->dump());
+    }
+    break;
+  }
   }
 }
 
@@ -756,6 +915,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
       DEBUG(dbgs() << "swap ");
     if (SwapVector[EntryIdx].MentionsPhysVR)
       DEBUG(dbgs() << "physreg ");
+    if (SwapVector[EntryIdx].MentionsPartialVR)
+      DEBUG(dbgs() << "partialreg ");
 
     if (SwapVector[EntryIdx].IsSwappable) {
       DEBUG(dbgs() << "swappable ");
@@ -780,6 +941,12 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
       case SH_SPLAT:
         DEBUG(dbgs() << "special:splat ");
         break;
+      case SH_XXPERMDI:
+        DEBUG(dbgs() << "special:xxpermdi ");
+        break;
+      case SH_COPYSCALAR:
+        DEBUG(dbgs() << "special:copyscalar ");
+        break;
       }
     }
 
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index c486411f9a1e..5b7bfdd28020 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_target(SparcCodeGen
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
   SparcTargetMachine.cpp
-  SparcSelectionDAGInfo.cpp
   SparcMCInstLower.cpp
   SparcTargetObjectFile.cpp
   )
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 91d2eeef0cc0..9113e4a46b96 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -57,7 +57,7 @@ static MCInstrInfo *createSparcMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createSparcMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitSparcMCRegisterInfo(X, SP::O7);
   return X;
@@ -65,11 +65,9 @@ static MCRegisterInfo *createSparcMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *
 createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
   if (CPU.empty())
     CPU = (TT.getArch() == Triple::sparcv9) ? "v9" : "v8";
-  InitSparcMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createSparcMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
 // Code models. Some only make sense for 64-bit code.
@@ -83,7 +81,8 @@ createSparcMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
 //
 // All code models require that the text segment is smaller than 2GB.
 
-static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createSparcMCCodeGenInfo(const Triple &TT,
+                                               Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
@@ -100,7 +99,8 @@ static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
   return X;
 }
 
-static MCCodeGenInfo *createSparcV9MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createSparcV9MCCodeGenInfo(const Triple &TT,
+                                                 Reloc::Model RM,
                                                  CodeModel::Model CM,
                                                  CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index bccc6bdd53eb..8fa10dcae114 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -190,11 +190,11 @@ static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
 {
 
   for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
-    if (MRI->isPhysRegUsed(reg))
+    if (!MRI->reg_nodbg_empty(reg))
       return false;
 
   for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
-    if (MRI->isPhysRegUsed(reg))
+    if (!MRI->reg_nodbg_empty(reg))
       return false;
 
   return true;
@@ -206,10 +206,10 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
   MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineFrameInfo    *MFI = MF.getFrameInfo();
 
-  return !(MFI->hasCalls()              // has calls
-           || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
-           || MRI.isPhysRegUsed(SP::O6) // %SP is used
-           || hasFP(MF));               // need %FP
+  return !(MFI->hasCalls()                 // has calls
+           || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
+           || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
+           || hasFP(MF));                  // need %FP
 }
 
 void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
@@ -218,16 +218,13 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
 
   // Remap %i[0-7] to %o[0-7].
   for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
-    if (!MRI.isPhysRegUsed(reg))
+    if (MRI.reg_nodbg_empty(reg))
       continue;
     unsigned mapped_reg = (reg - SP::I0 + SP::O0);
-    assert(!MRI.isPhysRegUsed(mapped_reg));
+    assert(MRI.reg_nodbg_empty(mapped_reg));
 
     // Replace I register with O register.
     MRI.replaceRegWith(reg, mapped_reg);
-
-    // Mark the reg unused.
-    MRI.setPhysRegUnused(reg);
   }
 
   // Rewrite MBB's Live-ins.
@@ -247,9 +244,10 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
 #endif
 }
 
-void SparcFrameLowering::processFunctionBeforeCalleeSavedScan
-                  (MachineFunction &MF, RegScavenger *RS) const {
-
+void SparcFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                              BitVector &SavedRegs,
+                                              RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
   if (!DisableLeafProc && isLeafProc(MF)) {
     SparcMachineFunctionInfo *MFI = MF.getInfo<SparcMachineFunctionInfo>();
     MFI->setLeafProc(true);
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index bb3b78861cbd..29fc7b7ba036 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -36,8 +36,8 @@ public:
 
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   bool hasFP(const MachineFunction &MF) const override;
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS = nullptr) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS = nullptr) const override;
 
 private:
   // Remap input registers to output registers for leaf procedure.
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 9c594a9f0f65..340b72e7940f 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -67,13 +67,16 @@ private:
 
 SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
   unsigned GlobalBaseReg = Subtarget->getInstrInfo()->getGlobalBaseReg(MF);
-  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
+  return CurDAG->getRegister(GlobalBaseReg,
+                             TLI->getPointerTy(CurDAG->getDataLayout()))
+      .getNode();
 }
 
 bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
+    Base = CurDAG->getTargetFrameIndex(
+        FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout()));
     Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
     return true;
   }
@@ -88,8 +91,8 @@ bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
         if (FrameIndexSDNode *FIN =
                 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
           // Constant offset from frame ref.
-          Base =
-              CurDAG->getTargetFrameIndex(FIN->getIndex(), TLI->getPointerTy());
+          Base = CurDAG->getTargetFrameIndex(
+              FIN->getIndex(), TLI->getPointerTy(CurDAG->getDataLayout()));
         } else {
           Base = Addr.getOperand(0);
         }
@@ -134,7 +137,7 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   }
 
   R1 = Addr;
-  R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy());
+  R2 = CurDAG->getRegister(SP::G0, TLI->getPointerTy(CurDAG->getDataLayout()));
   return true;
 }
 
@@ -168,10 +171,9 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     } else {
       TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
     }
-    TopPart = SDValue(CurDAG->getMachineNode(SP::WRASRrr, dl, MVT::i32,
-                                 TopPart,
-                                 CurDAG->getRegister(SP::G0, MVT::i32)), 0);
-    TopPart = CurDAG->getCopyToReg(TopPart, dl, SP::Y, TopPart, SDValue()).getValue(1);
+    TopPart = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SP::Y, TopPart,
+                                   SDValue())
+                  .getValue(1);
 
     // FIXME: Handle div by immediate.
     unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
@@ -184,12 +186,11 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     SDValue MulLHS = N->getOperand(0);
     SDValue MulRHS = N->getOperand(1);
     unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
-    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
-                                         MulLHS, MulRHS);
-    // The high part is in the Y register.
-    return CurDAG->SelectNodeTo(N, SP::RDASR, MVT::i32,
-                                CurDAG->getRegister(SP::Y, MVT::i32),
-                                SDValue(Mul, 1));
+    SDNode *Mul =
+        CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MulLHS, MulRHS);
+    SDValue ResultHigh = SDValue(Mul, 1);
+    ReplaceUses(SDValue(N, 0), ResultHigh);
+    return nullptr;
   }
   }
 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 0481676dc1ac..4879d4ee79e5 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -221,10 +221,11 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
     unsigned Reg = SFI->getSRetReturnReg();
     if (!Reg)
       llvm_unreachable("sret virtual register not created in the entry block");
-    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+    auto PtrVT = getPointerTy(DAG.getDataLayout());
+    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, PtrVT);
     Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
     Flag = Chain.getValue(1);
-    RetOps.push_back(DAG.getRegister(SP::I0, getPointerTy()));
+    RetOps.push_back(DAG.getRegister(SP::I0, PtrVT));
     RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
   }
 
@@ -418,6 +419,7 @@ LowerFormalArguments_32(SDValue Chain,
     assert(VA.isMemLoc());
 
     unsigned Offset = VA.getLocMemOffset()+StackOffset;
+    auto PtrVT = getPointerTy(DAG.getDataLayout());
 
     if (VA.needsCustom()) {
       assert(VA.getValVT() == MVT::f64);
@@ -426,7 +428,7 @@ LowerFormalArguments_32(SDValue Chain,
         int FI = MF.getFrameInfo()->CreateFixedObject(8,
                                                       Offset,
                                                       true);
-        SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+        SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
         SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
                                    MachinePointerInfo(),
                                    false,false, false, 0);
@@ -437,14 +439,14 @@ LowerFormalArguments_32(SDValue Chain,
       int FI = MF.getFrameInfo()->CreateFixedObject(4,
                                                     Offset,
                                                     true);
-      SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
       SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
                                   MachinePointerInfo(),
                                   false, false, false, 0);
       int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
                                                      Offset+4,
                                                      true);
-      SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+      SDValue FIPtr2 = DAG.getFrameIndex(FI2, PtrVT);
 
       SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
                                   MachinePointerInfo(),
@@ -460,7 +462,7 @@ LowerFormalArguments_32(SDValue Chain,
     int FI = MF.getFrameInfo()->CreateFixedObject(4,
                                                   Offset,
                                                   true);
-    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
     SDValue Load ;
     if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
       Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
@@ -607,10 +609,10 @@ LowerFormalArguments_64(SDValue Chain,
     if (VA.isExtInLoc())
       Offset += 8 - ValSize;
     int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
-    InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain,
-                                 DAG.getFrameIndex(FI, getPointerTy()),
-                                 MachinePointerInfo::getFixedStack(FI),
-                                 false, false, false, 0));
+    InVals.push_back(DAG.getLoad(
+        VA.getValVT(), DL, Chain,
+        DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
+        MachinePointerInfo::getFixedStack(FI), false, false, false, 0));
   }
 
   if (!IsVarArg)
@@ -637,10 +639,10 @@ LowerFormalArguments_64(SDValue Chain,
     unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
     SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
     int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
-    OutChains.push_back(DAG.getStore(Chain, DL, VArg,
-                                     DAG.getFrameIndex(FI, getPointerTy()),
-                                     MachinePointerInfo::getFixedStack(FI),
-                                     false, false, 0));
+    auto PtrVT = getPointerTy(MF.getDataLayout());
+    OutChains.push_back(
+        DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
+                     MachinePointerInfo::getFixedStack(FI), false, false, 0));
   }
 
   if (!OutChains.empty())
@@ -722,7 +724,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
     unsigned Align = Flags.getByValAlign();
 
     int FI = MFI->CreateStackObject(Size, Align, false);
-    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32);
 
     Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
@@ -993,7 +995,7 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
 
   PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
   Type *ElementTy = Ty->getElementType();
-  return getDataLayout()->getTypeAllocSize(ElementTy);
+  return DAG.getDataLayout().getTypeAllocSize(ElementTy);
 }
 
 
@@ -1057,6 +1059,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
   SelectionDAG &DAG = CLI.DAG;
   SDLoc DL = CLI.DL;
   SDValue Chain = CLI.Chain;
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   // Sparc target does not yet support tail call optimization.
   CLI.IsTailCall = false;
@@ -1130,13 +1133,11 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
         // Store and reload into the interger register reg and reg+1.
         unsigned Offset = 8 * (VA.getLocReg() - SP::I0);
         unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128;
-        SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+        SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
         SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset, DL);
-        HiPtrOff         = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
-                                       HiPtrOff);
+        HiPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, HiPtrOff);
         SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8, DL);
-        LoPtrOff         = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr,
-                                       LoPtrOff);
+        LoPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, LoPtrOff);
 
         // Store to %sp+BIAS+128+Offset
         SDValue Store = DAG.getStore(Chain, DL, Arg, HiPtrOff,
@@ -1180,13 +1181,13 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
     assert(VA.isMemLoc());
 
     // Create a store off the stack pointer for this argument.
-    SDValue StackPtr = DAG.getRegister(SP::O6, getPointerTy());
+    SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
     // The argument area starts at %fp+BIAS+128 in the callee frame,
     // %sp+BIAS+128 in ours.
     SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
                                            Subtarget->getStackPointerBias() +
                                            128, DL);
-    PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+    PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
     MemOpChains.push_back(DAG.getStore(Chain, DL, Arg, PtrOff,
                                        MachinePointerInfo(),
                                        false, false, 0));
@@ -1215,10 +1216,9 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
   unsigned TF = ((getTargetMachine().getRelocationModel() == Reloc::PIC_)
                  ? SparcMCExpr::VK_Sparc_WPLT30 : 0);
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
-                                        TF);
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, TF);
   else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy(), TF);
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, TF);
 
   // Build the operands for the call instruction itself.
   SmallVector<SDValue, 8> Ops;
@@ -1370,6 +1370,8 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
 SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
                                          const SparcSubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
+  auto &DL = *TM.getDataLayout();
+
   // Set up the register classes.
   addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
   addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
@@ -1394,10 +1396,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
   setTruncStoreAction(MVT::f128, MVT::f64, Expand);
 
   // Custom legalize GlobalAddress nodes into LO/HI parts.
-  setOperationAction(ISD::GlobalAddress, getPointerTy(), Custom);
-  setOperationAction(ISD::GlobalTLSAddress, getPointerTy(), Custom);
-  setOperationAction(ISD::ConstantPool, getPointerTy(), Custom);
-  setOperationAction(ISD::BlockAddress, getPointerTy(), Custom);
+  setOperationAction(ISD::GlobalAddress, getPointerTy(DL), Custom);
+  setOperationAction(ISD::GlobalTLSAddress, getPointerTy(DL), Custom);
+  setOperationAction(ISD::ConstantPool, getPointerTy(DL), Custom);
+  setOperationAction(ISD::BlockAddress, getPointerTy(DL), Custom);
 
   // Sparc doesn't have sext_inreg, replace them with shl/sra
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -1704,7 +1706,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   return nullptr;
 }
 
-EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+                                            EVT VT) const {
   if (!VT.isVector())
     return MVT::i32;
   return VT.changeVectorElementTypeToInteger();
@@ -1804,7 +1807,7 @@ SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
 // or ExternalSymbol SDNode.
 SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
-  EVT VT = getPointerTy();
+  EVT VT = getPointerTy(DAG.getDataLayout());
 
   // Handle PIC mode first.
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
@@ -1871,7 +1874,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   SDLoc DL(GA);
   const GlobalValue *GV = GA->getGlobal();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   TLSModel::Model model = getTargetMachine().getTLSModel(GV);
 
@@ -1983,7 +1986,7 @@ SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args,
   if (ArgTy->isFP128Ty()) {
     // Create a stack object and pass the pointer to the library function.
     int FI = MFI->CreateStackObject(16, 8, false);
-    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     Chain = DAG.getStore(Chain,
                          DL,
                          Entry.Node,
@@ -2008,8 +2011,9 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
   ArgListTy Args;
 
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  SDValue Callee = DAG.getExternalSymbol(LibFuncName, getPointerTy());
+  SDValue Callee = DAG.getExternalSymbol(LibFuncName, PtrVT);
   Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
   Type *RetTyABI = RetTy;
   SDValue Chain = DAG.getEntryNode();
@@ -2019,7 +2023,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
     // Create a Stack Object to receive the return value of type f128.
     ArgListEntry Entry;
     int RetFI = MFI->CreateStackObject(16, 8, false);
-    RetPtr = DAG.getFrameIndex(RetFI, getPointerTy());
+    RetPtr = DAG.getFrameIndex(RetFI, PtrVT);
     Entry.Node = RetPtr;
     Entry.Ty   = PointerType::getUnqual(RetTy);
     if (!Subtarget->is64Bit())
@@ -2082,7 +2086,8 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
   case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break;
   }
 
-  SDValue Callee = DAG.getExternalSymbol(LibCall, getPointerTy());
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Callee = DAG.getExternalSymbol(LibCall, PtrVT);
   Type *RetTy = Type::getInt32Ty(*DAG.getContext());
   ArgListTy Args;
   SDValue Chain = DAG.getEntryNode();
@@ -2362,6 +2367,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
                             const SparcTargetLowering &TLI) {
   MachineFunction &MF = DAG.getMachineFunction();
   SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+  auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
 
   // Need frame address to find the address of VarArgsFrameIndex.
   MF.getFrameInfo()->setFrameAddressIsTaken(true);
@@ -2370,9 +2376,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
   // memory location argument.
   SDLoc DL(Op);
   SDValue Offset =
-    DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(),
-                DAG.getRegister(SP::I6, TLI.getPointerTy()),
-                DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
+      DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(SP::I6, PtrVT),
+                  DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
                       MachinePointerInfo(SV), false, false, 0);
@@ -2497,8 +2502,8 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
 
   SDValue RetAddr;
   if (depth == 0) {
-    unsigned RetReg = MF.addLiveIn(SP::I7,
-                                   TLI.getRegClassFor(TLI.getPointerTy()));
+    auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+    unsigned RetReg = MF.addLiveIn(SP::I7, TLI.getRegClassFor(PtrVT));
     RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
     return RetAddr;
   }
@@ -3065,7 +3070,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 SparcTargetLowering::ConstraintType
-SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+SparcTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default:  break;
@@ -3139,7 +3144,7 @@ LowerAsmOperandForConstraint(SDValue Op,
 
 std::pair<unsigned, const TargetRegisterClass *>
 SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                                  const std::string &Constraint,
+                                                  StringRef Constraint,
                                                   MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index b6bc3d255713..bbc91a493c9d 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -72,7 +72,7 @@ namespace llvm {
 
     const char *getTargetNodeName(unsigned Opcode) const override;
 
-    ConstraintType getConstraintType(const std::string &Constraint) const override;
+    ConstraintType getConstraintType(StringRef Constraint) const override;
     ConstraintWeight
     getSingleConstraintMatchWeight(AsmOperandInfo &info,
                                    const char *constraint) const override;
@@ -82,14 +82,16 @@ namespace llvm {
                                       SelectionDAG &DAG) const override;
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
-    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+      return MVT::i32;
+    }
 
     /// getSetCCResultType - Return the ISD::SETCC ValueType
-    EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                           EVT VT) const override;
 
     SDValue
       LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td
index 670e9e989c81..25cc652dbd9e 100644
--- a/lib/Target/Sparc/SparcInstrAliases.td
+++ b/lib/Target/Sparc/SparcInstrAliases.td
@@ -245,6 +245,7 @@ multiclass fp_cond_alias<string cond, int condVal> {
 }
 
 defm : int_cond_alias<"a",    0b1000>;
+defm : int_cond_alias<"",     0b1000>; // same as a; gnu asm, not in manual
 defm : int_cond_alias<"n",    0b0000>;
 defm : int_cond_alias<"ne",   0b1001>;
 defm : int_cond_alias<"nz",   0b1001>; // same as ne
@@ -266,6 +267,7 @@ defm : int_cond_alias<"vc",   0b1111>;
 defm : int_cond_alias<"vs",   0b0111>;
 
 defm : fp_cond_alias<"a",     0b0000>;
+defm : fp_cond_alias<"",      0b0000>; // same as a; gnu asm, not in manual
 defm : fp_cond_alias<"n",     0b1000>;
 defm : fp_cond_alias<"u",     0b0111>;
 defm : fp_cond_alias<"g",     0b0110>;
@@ -284,7 +286,16 @@ defm : fp_cond_alias<"le",    0b1101>;
 defm : fp_cond_alias<"ule",   0b1110>;
 defm : fp_cond_alias<"o",     0b1111>;
 
-// Instruction aliases for JMPL.
+// Section A.3 Synthetic Instructions
+
+// Most are marked as Emit=0, so that they are not used for disassembly. This is
+// an aesthetic issue, but the chosen policy is to typically prefer using the
+// non-alias form, except for the most obvious and clarifying aliases: cmp, jmp,
+// call, tst, ret, retl.
+
+// Note: cmp is handled in SparcInstrInfo.
+//       jmp/call/ret/retl have special case handling for output in
+//       SparcInstPrinter.cpp
 
 // jmp addr -> jmpl addr, %g0
 def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr), 0>;
@@ -294,25 +305,129 @@ def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr), 0>;
 def : InstAlias<"call $addr", (JMPLrr O7, MEMrr:$addr), 0>;
 def : InstAlias<"call $addr", (JMPLri O7, MEMri:$addr), 0>;
 
-// retl -> RETL 8
-def : InstAlias<"retl", (RETL 8)>;
+// tst reg -> orcc %g0, reg, %g0
+def : InstAlias<"tst $rs2", (ORCCrr G0, IntRegs:$rs2, G0)>;
 
-// ret -> RET 8
+// ret -> jmpl %i7+8, %g0 (aka RET 8)
 def : InstAlias<"ret", (RET 8)>;
 
-// mov reg, rd -> or %g0, reg, rd
-def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>;
+// retl -> jmpl %o7+8, %g0 (aka RETL 8)
+def : InstAlias<"retl", (RETL 8)>;
 
-// mov simm13, rd -> or %g0, simm13, rd
-def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>;
+// restore -> restore %g0, %g0, %g0
+def : InstAlias<"restore", (RESTORErr G0, G0, G0)>;
+
+// save -> restore %g0, %g0, %g0
+def : InstAlias<"save", (SAVErr G0, G0, G0)>;
 
 // set value, rd
 // (turns into a sequence of sethi+or, depending on the value)
 // def : InstAlias<"set $val, $rd", (ORri IntRegs:$rd, (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
 def SET : AsmPseudoInst<(outs IntRegs:$rd), (ins i32imm:$val), "set $val, $rd">;
 
-// restore -> restore %g0, %g0, %g0
-def : InstAlias<"restore", (RESTORErr G0, G0, G0)>;
+// not rd -> xnor rd, %g0, rd
+def : InstAlias<"not $rd", (XNORrr IntRegs:$rd, IntRegs:$rd, G0), 0>;
+
+// not reg, rd -> xnor reg, %g0, rd
+def : InstAlias<"not $rs1, $rd", (XNORrr IntRegs:$rd, IntRegs:$rs1, G0), 0>;
+
+// neg rd -> sub %g0, rd, rd
+def : InstAlias<"neg $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rd), 0>;
+
+// neg reg, rd -> sub %g0, reg, rd
+def : InstAlias<"neg $rs2, $rd", (SUBrr IntRegs:$rd, G0, IntRegs:$rs2), 0>;
+
+// inc rd -> add rd, 1, rd
+def : InstAlias<"inc $rd", (ADDri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// inc simm13, rd -> add rd, simm13, rd
+def : InstAlias<"inc $simm13, $rd", (ADDri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// inccc rd -> addcc rd, 1, rd
+def : InstAlias<"inccc $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// inccc simm13, rd -> addcc rd, simm13, rd
+def : InstAlias<"inccc $simm13, $rd", (ADDCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// dec rd -> sub rd, 1, rd
+def : InstAlias<"dec $rd", (SUBri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// dec simm13, rd -> sub rd, simm13, rd
+def : InstAlias<"dec $simm13, $rd", (SUBri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// deccc rd -> subcc rd, 1, rd
+def : InstAlias<"deccc $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, 1), 0>;
+
+// deccc simm13, rd -> subcc rd, simm13, rd
+def : InstAlias<"deccc $simm13, $rd", (SUBCCri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// btst reg_or_imm, reg -> andcc reg,reg_or_imm,%g0
+def : InstAlias<"btst $rs2, $rs1", (ANDCCrr G0, IntRegs:$rs1, IntRegs:$rs2), 0>;
+def : InstAlias<"btst $simm13, $rs1", (ANDCCri G0, IntRegs:$rs1, i32imm:$simm13), 0>;
+
+// bset reg_or_imm, rd -> or rd,reg_or_imm,rd
+def : InstAlias<"bset $rs2, $rd", (ORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
+def : InstAlias<"bset $simm13, $rd", (ORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// bclr reg_or_imm, rd -> andn rd,reg_or_imm,rd
+def : InstAlias<"bclr $rs2, $rd", (ANDNrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
+def : InstAlias<"bclr $simm13, $rd", (ANDNri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+// btog reg_or_imm, rd -> xor rd,reg_or_imm,rd
+def : InstAlias<"btog $rs2, $rd", (XORrr IntRegs:$rd, IntRegs:$rd, IntRegs:$rs2), 0>;
+def : InstAlias<"btog $simm13, $rd", (XORri IntRegs:$rd, IntRegs:$rd, i32imm:$simm13), 0>;
+
+
+// clr rd -> or %g0, %g0, rd
+def : InstAlias<"clr $rd", (ORrr IntRegs:$rd, G0, G0), 0>;
+
+// clr{b,h,} [addr] -> st{b,h,} %g0, [addr]
+def : InstAlias<"clrb [$addr]", (STBrr MEMrr:$addr, G0), 0>;
+def : InstAlias<"clrb [$addr]", (STBri MEMri:$addr, G0), 0>;
+def : InstAlias<"clrh [$addr]", (STHrr MEMrr:$addr, G0), 0>;
+def : InstAlias<"clrh [$addr]", (STHri MEMri:$addr, G0), 0>;
+def : InstAlias<"clr [$addr]", (STrr MEMrr:$addr, G0), 0>;
+def : InstAlias<"clr [$addr]", (STri MEMri:$addr, G0), 0>;
+
+
+// mov reg_or_imm, rd -> or %g0, reg_or_imm, rd
+def : InstAlias<"mov $rs2, $rd", (ORrr IntRegs:$rd, G0, IntRegs:$rs2)>;
+def : InstAlias<"mov $simm13, $rd", (ORri IntRegs:$rd, G0, i32imm:$simm13)>;
+
+// mov specialreg, rd -> rd specialreg, rd
+def : InstAlias<"mov $asr, $rd", (RDASR IntRegs:$rd, ASRRegs:$asr), 0>;
+def : InstAlias<"mov %psr, $rd", (RDPSR IntRegs:$rd), 0>;
+def : InstAlias<"mov %wim, $rd", (RDWIM IntRegs:$rd), 0>;
+def : InstAlias<"mov %tbr, $rd", (RDTBR IntRegs:$rd), 0>;
+
+// mov reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg
+def : InstAlias<"mov $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
+def : InstAlias<"mov $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"mov $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+
+// End of Section A.3
+
+// wr reg_or_imm, specialreg -> wr %g0, reg_or_imm, specialreg
+// (aka: omit the first arg when it's g0. This is not in the manual, but is
+// supported by gnu and solaris as)
+def : InstAlias<"wr $rs2, $asr", (WRASRrr ASRRegs:$asr, G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, $asr", (WRASRri ASRRegs:$asr, G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $rs2, %psr", (WRPSRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, %psr", (WRPSRri G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $rs2, %wim", (WRWIMrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, %wim", (WRWIMri G0, i32imm:$simm13), 0>;
+def : InstAlias<"wr $rs2, %tbr", (WRTBRrr G0, IntRegs:$rs2), 0>;
+def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
+
+
+// flush -> flush %g0
+def : InstAlias<"flush", (FLUSH), 0>;
+
 
 def : MnemonicAlias<"return", "rett">, Requires<[HasV9]>;
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index f87cee43e319..6167c532db80 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -324,6 +324,15 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       numSubRegs = 4;
       movOpc     = SP::FMOVS;
     }
+  } else if (SP::ASRRegsRegClass.contains(DestReg) &&
+             SP::IntRegsRegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(SP::WRASRrr), DestReg)
+        .addReg(SP::G0)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (SP::IntRegsRegClass.contains(DestReg) &&
+             SP::ASRRegsRegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(SP::RDASR), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
   } else
     llvm_unreachable("Impossible reg-to-reg copy");
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index a02bae07a336..3b9e048ea8b3 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -536,6 +536,7 @@ let Defs = [ICC] in
 let Uses = [ICC] in
   defm SUBC   : F3_12np <"subx", 0b001100>;
 
+// cmp (from Section A.3) is a specialized alias for subcc
 let Defs = [ICC], rd = 0 in {
   def CMPrr   : F3_1<2, 0b010100,
                      (outs), (ins IntRegs:$rs1, IntRegs:$rs2),
@@ -559,12 +560,12 @@ let Defs = [Y, ICC] in {
 }
 
 // Section B.19 - Divide Instructions, p. 115
-let Defs = [Y] in {
+let Uses = [Y], Defs = [Y] in {
   defm UDIV : F3_12np<"udiv", 0b001110>;
   defm SDIV : F3_12np<"sdiv", 0b001111>;
 }
 
-let Defs = [Y, ICC] in {
+let Uses = [Y], Defs = [Y, ICC] in {
   defm UDIVCC : F3_12np<"udivcc", 0b011110>;
   defm SDIVCC : F3_12np<"sdivcc", 0b011111>;
 }
@@ -828,6 +829,20 @@ let rd = 0 in
   def UNIMP : F2_1<0b000, (outs), (ins i32imm:$imm22),
                   "unimp $imm22", []>;
 
+// Section B.32 - Flush Instruction Memory
+let rd = 0 in {
+  def FLUSHrr : F3_1<2, 0b111011, (outs), (ins MEMrr:$addr),
+                       "flush $addr", []>;
+  def FLUSHri : F3_2<2, 0b111011, (outs), (ins MEMri:$addr),
+                       "flush $addr", []>;
+
+  // The no-arg FLUSH is only here for the benefit of the InstAlias
+  // "flush", which cannot seem to use FLUSHrr, due to the inability
+  // to construct a MEMrr with fixed G0 registers.
+  let rs1 = 0, rs2 = 0 in
+    def FLUSH   : F3_1<2, 0b111011, (outs), (ins), "flush %g0", []>;
+}
+
 // Section B.33 - Floating-point Operate (FPop) Instructions
 
 // Convert Integer to Floating-point Instructions, p. 141
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index e504da4d3b21..db8a7e86962d 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -249,4 +249,6 @@ def FCCRegs : RegisterClass<"SP", [i1], 1, (sequence "FCC%u", 0, 3)>;
 
 // Ancillary state registers
 def ASRRegs : RegisterClass<"SP", [i32], 32,
-                            (add Y, (sequence "ASR%u", 1, 31))>;
+                            (add Y, (sequence "ASR%u", 1, 31))> {
+  let isAllocatable = 0;
+}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
deleted file mode 100644
index a308fc5e739e..000000000000
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SparcSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SparcSelectionDAGInfo.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "sparc-selectiondag-info"
-
-SparcSelectionDAGInfo::SparcSelectionDAGInfo(const DataLayout &DL)
-  : TargetSelectionDAGInfo(&DL) {
-}
-
-SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
-}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
deleted file mode 100644
index 6818291b30b4..000000000000
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Sparc subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
-#define LLVM_LIB_TARGET_SPARC_SPARCSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class SparcTargetMachine;
-
-class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit SparcSelectionDAGInfo(const DataLayout &DL);
-  ~SparcSelectionDAGInfo() override;
-};
-
-}
-
-#endif
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 479b25d2723f..d69da409e428 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -54,7 +54,7 @@ SparcSubtarget::SparcSubtarget(const Triple &TT, const std::string &CPU,
                                bool is64Bit)
     : SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      TSInfo(*TM.getDataLayout()), FrameLowering(*this) {}
+      FrameLowering(*this) {}
 
 int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
 
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 983b1193975d..9d21911d88f0 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -17,9 +17,9 @@
 #include "SparcFrameLowering.h"
 #include "SparcInstrInfo.h"
 #include "SparcISelLowering.h"
-#include "SparcSelectionDAGInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
@@ -39,7 +39,7 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
   bool UsePopc;
   SparcInstrInfo InstrInfo;
   SparcTargetLowering TLInfo;
-  SparcSelectionDAGInfo TSInfo;
+  TargetSelectionDAGInfo TSInfo;
   SparcFrameLowering FrameLowering;
 
 public:
@@ -56,7 +56,7 @@ public:
   const SparcTargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
-  const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
+  const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
     return &TSInfo;
   }
 
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 81882106fc46..5fefa315a4cf 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -148,7 +148,7 @@ static MCInstrInfo *createSystemZMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createSystemZMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitSystemZMCRegisterInfo(X, SystemZ::R14D);
   return X;
@@ -156,12 +156,11 @@ static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *
 createSystemZMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createSystemZMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
-static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createSystemZMCCodeGenInfo(const Triple &TT,
+                                                 Reloc::Model RM,
                                                  CodeModel::Model CM,
                                                  CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index a636b35635ce..397de472a6ee 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -61,11 +61,12 @@ SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
   return SpillOffsetTable;
 }
 
-void SystemZFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS) const {
+void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                                BitVector &SavedRegs,
+                                                RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
   MachineFrameInfo *MFFrame = MF.getFrameInfo();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   bool HasFP = hasFP(MF);
   SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
@@ -77,17 +78,17 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // argument register R6D.
   if (IsVarArg)
     for (unsigned I = MFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
-      MRI.setPhysRegUsed(SystemZ::ArgGPRs[I]);
+      SavedRegs.set(SystemZ::ArgGPRs[I]);
 
   // If the function requires a frame pointer, record that the hard
   // frame pointer will be clobbered.
   if (HasFP)
-    MRI.setPhysRegUsed(SystemZ::R11D);
+    SavedRegs.set(SystemZ::R11D);
 
   // If the function calls other functions, record that the return
   // address register will be clobbered.
   if (MFFrame->hasCalls())
-    MRI.setPhysRegUsed(SystemZ::R14D);
+    SavedRegs.set(SystemZ::R14D);
 
   // If we are saving GPRs other than the stack pointer, we might as well
   // save and restore the stack pointer at the same time, via STMG and LMG.
@@ -96,8 +97,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
   for (unsigned I = 0; CSRegs[I]; ++I) {
     unsigned Reg = CSRegs[I];
-    if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) {
-      MRI.setPhysRegUsed(SystemZ::R15D);
+    if (SystemZ::GR64BitRegClass.contains(Reg) && SavedRegs.test(Reg)) {
+      SavedRegs.set(SystemZ::R15D);
       break;
     }
   }
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 60bad894ee44..5ade757f17f7 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -27,8 +27,8 @@ public:
   bool isFPCloseToIncomingSP() const override { return false; }
   const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const
     override;
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS) const override;
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
                                  const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 372f6fb3ea50..056ee02dcc21 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,10 +81,11 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
   return Op;
 }
 
-SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
                                              const SystemZSubtarget &STI)
-    : TargetLowering(tm), Subtarget(STI) {
-  MVT PtrVT = getPointerTy();
+    : TargetLowering(TM), Subtarget(STI) {
+  auto &DL = *TM.getDataLayout();
+  MVT PtrVT = getPointerTy(DL);
 
   // Set up the register classes.
   if (Subtarget.hasHighWord())
@@ -455,7 +456,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
   MaxStoresPerMemsetOptSize = 0;
 }
 
-EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
+                                              LLVMContext &, EVT VT) const {
   if (!VT.isVector())
     return MVT::i32;
   return VT.changeVectorElementTypeToInteger();
@@ -507,8 +509,8 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   return true;
 }
 
-bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                                  Type *Ty,
+bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                                  const AddrMode &AM, Type *Ty,
                                                   unsigned AS) const {
   // Punt on globals for now, although they can be used in limited
   // RELATIVE LONG cases.
@@ -544,7 +546,7 @@ bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
 //===----------------------------------------------------------------------===//
 
 TargetLowering::ConstraintType
-SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'a': // Address register
@@ -641,13 +643,14 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
 // has already been verified.  MC is the class associated with "t" and
 // Map maps 0-based register numbers to LLVM register numbers.
 static std::pair<unsigned, const TargetRegisterClass *>
-parseRegisterNumber(const std::string &Constraint,
-                    const TargetRegisterClass *RC, const unsigned *Map) {
+parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
+                    const unsigned *Map) {
   assert(*(Constraint.end()-1) == '}' && "Missing '}'");
   if (isdigit(Constraint[2])) {
-    std::string Suffix(Constraint.data() + 2, Constraint.size() - 2);
-    unsigned Index = atoi(Suffix.c_str());
-    if (Index < 16 && Map[Index])
+    unsigned Index;
+    bool Failed =
+        Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
+    if (!Failed && Index < 16 && Map[Index])
       return std::make_pair(Map[Index], RC);
   }
   return std::make_pair(0U, nullptr);
@@ -655,8 +658,7 @@ parseRegisterNumber(const std::string &Constraint,
 
 std::pair<unsigned, const TargetRegisterClass *>
 SystemZTargetLowering::getRegForInlineAsmConstraint(
-    const TargetRegisterInfo *TRI, const std::string &Constraint,
-    MVT VT) const {
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   if (Constraint.size() == 1) {
     // GCC Constraint Letters
     switch (Constraint[0]) {
@@ -687,7 +689,7 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
       return std::make_pair(0U, &SystemZ::FP32BitRegClass);
     }
   }
-  if (Constraint[0] == '{') {
+  if (Constraint.size() > 0 && Constraint[0] == '{') {
     // We need to override the default register parsing for GPRs and FPRs
     // because the interpretation depends on VT.  The internal names of
     // the registers are also different from the external names
@@ -931,7 +933,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
       // Create the SelectionDAG nodes corresponding to a load
       // from this parameter.  Unpromoted ints and floats are
       // passed as right-justified 8-byte values.
-      EVT PtrVT = getPointerTy();
+      EVT PtrVT = getPointerTy(DAG.getDataLayout());
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
@@ -969,7 +971,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
       for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
         unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
         int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
-        SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+        SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
         unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
                                      &SystemZ::FP64BitRegClass);
         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
@@ -1019,7 +1021,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
   CallingConv::ID CallConv = CLI.CallConv;
   bool IsVarArg = CLI.IsVarArg;
   MachineFunction &MF = DAG.getMachineFunction();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(MF.getDataLayout());
 
   // Detect unsupported vector argument and return types.
   if (Subtarget.hasVector()) {
@@ -2401,7 +2403,7 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
   SDLoc DL(Node);
   const GlobalValue *GV = Node->getGlobal();
   int64_t Offset = Node->getOffset();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   Reloc::Model RM = DAG.getTarget().getRelocationModel();
   CodeModel::Model CM = DAG.getTarget().getCodeModel();
 
@@ -2440,7 +2442,7 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
                                                  unsigned Opcode,
                                                  SDValue GOTOffset) const {
   SDLoc DL(Node);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Chain = DAG.getEntryNode();
   SDValue Glue;
 
@@ -2486,7 +2488,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
 						     SelectionDAG &DAG) const {
   SDLoc DL(Node);
   const GlobalValue *GV = Node->getGlobal();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
 
   // The high part of the thread pointer is in access register 0.
@@ -2587,7 +2589,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
   SDLoc DL(Node);
   const BlockAddress *BA = Node->getBlockAddress();
   int64_t Offset = Node->getOffset();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
   Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -2597,7 +2599,7 @@ SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
                                               SelectionDAG &DAG) const {
   SDLoc DL(JT);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 
   // Use LARL to load the address of the table.
@@ -2607,7 +2609,7 @@ SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
                                                  SelectionDAG &DAG) const {
   SDLoc DL(CP);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   SDValue Result;
   if (CP->isMachineConstantPoolEntry())
@@ -2671,7 +2673,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   SystemZMachineFunctionInfo *FuncInfo =
     MF.getInfo<SystemZMachineFunctionInfo>();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   SDValue Chain   = Op.getOperand(0);
   SDValue Addr    = Op.getOperand(1);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 2f7617bbdac3..949b67f114ea 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -339,10 +339,10 @@ public:
                                  const SystemZSubtarget &STI);
 
   // Override TargetLowering.
-  MVT getScalarShiftAmountTy(EVT LHSTy) const override {
+  MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
     return MVT::i32;
   }
-  MVT getVectorIdxTy() const override {
+  MVT getVectorIdxTy(const DataLayout &DL) const override {
     // Only the lower 12 bits of an element index are used, so we don't
     // want to clobber the upper 32 bits of a GPR unnecessarily.
     return MVT::i32;
@@ -364,12 +364,13 @@ public:
       return TypeWidenVector;
     return TargetLoweringBase::getPreferredVectorAction(VT);
   }
-  EVT getSetCCResultType(LLVMContext &, EVT) const override;
+  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
+                         EVT) const override;
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
   bool isLegalICmpImmediate(int64_t Imm) const override;
   bool isLegalAddImmediate(int64_t Imm) const override;
-  bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
@@ -379,10 +380,9 @@ public:
   const char *getTargetNodeName(unsigned Opcode) const override;
   std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                               const std::string &Constraint,
-                               MVT VT) const override;
+                               StringRef Constraint, MVT VT) const override;
   TargetLowering::ConstraintType
-    getConstraintType(const std::string &Constraint) const override;
+  getConstraintType(StringRef Constraint) const override;
   TargetLowering::ConstraintWeight
     getSingleConstraintMatchWeight(AsmOperandInfo &info,
                                    const char *constraint) const override;
@@ -391,8 +391,7 @@ public:
                                     std::vector<SDValue> &Ops,
                                     SelectionDAG &DAG) const override;
 
-  unsigned getInlineAsmMemConstraint(
-      const std::string &ConstraintCode) const override {
+  unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
     if (ConstraintCode.size() == 1) {
       switch(ConstraintCode[0]) {
       default:
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 7cabea962e91..dc7bd25d7ed5 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -36,7 +36,7 @@ SystemZRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
 BitVector
 SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const SystemZFrameLowering *TFI = getFrameLowering(MF);
 
   if (TFI->hasFP(MF)) {
     // R11D is the frame pointer.  Reserve all aliases.
@@ -64,7 +64,7 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   MachineFunction &MF = *MBB.getParent();
   auto *TII =
       static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const SystemZFrameLowering *TFI = getFrameLowering(MF);
   DebugLoc DL = MI->getDebugLoc();
 
   // Decompose the frame index into a base and offset.
@@ -135,6 +135,6 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 
 unsigned
 SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const SystemZFrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D;
 }
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index e7e0268dbb8a..178aa3817311 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -18,12 +18,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "systemz-selectiondag-info"
 
-SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL)
-    : TargetSelectionDAGInfo(&DL) {}
-
-SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
-}
-
 // Decide whether it is best to use a loop or straight-line code for
 // a block operation of Size bytes with source address Src and destination
 // address Dest.  Sequence is the opcode to use for straight-line code
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index a257d6b55494..246fa3e5e656 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -22,8 +22,7 @@ class SystemZTargetMachine;
 
 class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  explicit SystemZSelectionDAGInfo(const DataLayout &DL);
-  ~SystemZSelectionDAGInfo();
+  explicit SystemZSelectionDAGInfo() = default;
 
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
                                   SDValue Dst, SDValue Src,
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index eb5e5c0b9ff8..0b49fcdd8f78 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -42,7 +42,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
       HasTransactionalExecution(false), HasProcessorAssist(false),
       HasVector(false), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      TSInfo(*TM.getDataLayout()), FrameLowering() {}
+      TSInfo(), FrameLowering() {}
 
 // Return true if GV binds locally under reloc model RM.
 static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index e9cabe968eea..4b80973ed879 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -29,7 +29,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
 
 public:
   explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
-    : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   SystemZTTIImpl(const SystemZTTIImpl &Arg)
@@ -37,18 +38,6 @@ public:
   SystemZTTIImpl(SystemZTTIImpl &&Arg)
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
-  SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
 
   /// \name Scalar TTI Implementations
   /// @{
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 0b05303f71bf..83174c20c8e9 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -150,8 +150,9 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
 }
 
 TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis(
-      [this](Function &) { return TargetTransformInfo(getDataLayout()); });
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(F.getParent()->getDataLayout());
+  });
 }
 
 static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 87df7af84525..6a61fcdf0f86 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -19,7 +19,14 @@ using namespace llvm;
 //---------------------------------------------------------------------------
 // TargetSubtargetInfo Class
 //
-TargetSubtargetInfo::TargetSubtargetInfo() {}
+TargetSubtargetInfo::TargetSubtargetInfo(
+    const Triple &TT, StringRef CPU, StringRef FS,
+    ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
+    const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+    const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
+    const InstrStage *IS, const unsigned *OC, const unsigned *FP)
+    : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
+}
 
 TargetSubtargetInfo::~TargetSubtargetInfo() {}
 
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
index df04c2a3460b..25de9eee0831 100644
--- a/lib/Target/WebAssembly/CMakeLists.txt
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(LLVM_TARGET_DEFINITIONS WebAssembly.td)
 
 tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM WebAssemblyGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(WebAssemblyCommonTableGen)
 
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index d248556c62d7..224aa773a80e 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -29,6 +29,9 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_MC_DESC
 #include "WebAssemblyGenSubtargetInfo.inc"
 
+#define GET_REGINFO_MC_DESC
+#include "WebAssemblyGenRegisterInfo.inc"
+
 static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI,
                                              const Triple &TT) {
   MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT);
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 24893daec7ea..eebf5b72f62b 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -47,6 +47,9 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
 // Defines symbolic names for WebAssembly registers. This defines a mapping from
 // register name to register number.
 //
+#define GET_REGINFO_ENUM
+#include "WebAssemblyGenRegisterInfo.inc"
+
 #define GET_SUBTARGETINFO_ENUM
 #include "WebAssemblyGenSubtargetInfo.inc"
 
diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile
index 35d835c6506c..f102d73f6e86 100644
--- a/lib/Target/WebAssembly/Makefile
+++ b/lib/Target/WebAssembly/Makefile
@@ -12,7 +12,8 @@ LIBRARYNAME = LLVMWebAssemblyCodeGen
 TARGET = WebAssembly
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = WebAssemblyGenSubtargetInfo.inc WebAssemblyGenMCCodeEmitter.inc
+BUILT_SOURCES = WebAssemblyGenRegisterInfo.inc WebAssemblyGenSubtargetInfo.inc \
+		WebAssemblyGenMCCodeEmitter.inc
 
 DIRS = InstPrinter TargetInfo MCTargetDesc
 
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
index 7a71060a638f..63e02c455895 100644
--- a/lib/Target/WebAssembly/README.txt
+++ b/lib/Target/WebAssembly/README.txt
@@ -12,4 +12,15 @@ binary encoding of WebAssembly itself:
   * https://github.com/WebAssembly/design/blob/master/AstSemantics.md
   * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
 
+Interesting work that remains to be done:
+* Write a pass to restructurize irreducible control flow. This needs to be done
+  before register allocation to be efficient, because it may duplicate basic
+  blocks and WebAssembly performs register allocation at a whole-function
+  level. Note that LLVM's GPU code has such a pass, but it linearizes control
+  flow (e.g. both sides of branches execute and are masked) which is undesirable
+  for WebAssembly.
+* Basic relooper to expose control flow as an AST.
+* Figure out how to properly use MC for virtual ISAs. This may require some
+  refactoring of MC.
+
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4eec02efbd94..4184eb6dc5a6 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -38,6 +38,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   // WebAssembly does not produce floating-point exceptions on normal floating
   // point operations.
   setHasFloatingPointExceptions(false);
+  // We don't know the microarchitecture here, so just reduce register pressure.
+  setSchedulingPreference(Sched::RegPressure);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 35e88eec8573..64415658ed81 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// WebAssembly Atomic operand code-gen constructs.
-//
+///
+/// \file
+/// \brief WebAssembly Atomic operand code-gen constructs.
+///
 //===----------------------------------------------------------------------===//
 
 // TODO: Implement atomic instructions.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
new file mode 100644
index 000000000000..6b5b6cd54173
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -0,0 +1,21 @@
+//===- WebAssemblyInstrCall.td-WebAssembly Call codegen support -*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Call operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+/*
+ * TODO(jfb): Add the following.
+ *
+ * call_direct: call function directly
+ * call_indirect: call function indirectly
+ * addressof: obtain a function pointer value for a given function
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
new file mode 100644
index 000000000000..3fa29061b1de
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -0,0 +1,44 @@
+//===-- WebAssemblyInstrConv.td-WebAssembly Conversion support -*- tablegen -*-=
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly datatype conversions, truncations, reinterpretations,
+/// promotions, and demotions operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+/*
+ * TODO(jfb): Add the following.
+ *
+ * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer
+ * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer
+ * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer
+ * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer
+ * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer
+ * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer
+ * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer
+ * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer
+ * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer
+ * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer
+ * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer
+ * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer
+ * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer
+ * float32.demote[float64]: demote a 64-bit float to a 32-bit float
+ * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float
+ * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float
+ * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float
+ * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float
+ * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float
+ * float64.promote[float32]: promote a 32-bit float to a 64-bit float
+ * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float
+ * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float
+ * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float
+ * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float
+ * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
new file mode 100644
index 000000000000..30ef6339d65a
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -0,0 +1,44 @@
+// WebAssemblyInstrFloat.td-WebAssembly Float codegen support ---*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Floating-point operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+defm FADD : BinaryFP<fadd>;
+defm FSUB : BinaryFP<fsub>;
+defm FMUL : BinaryFP<fmul>;
+defm FDIV : BinaryFP<fdiv>;
+defm FABS : UnaryFP<fabs>;
+defm FNEG : UnaryFP<fneg>;
+defm COPYSIGN : BinaryFP<fcopysign>;
+defm CEIL : UnaryFP<fceil>;
+defm FLOOR : UnaryFP<ffloor>;
+defm TRUNC : UnaryFP<ftrunc>;
+defm NEARESTINT : UnaryFP<fnearbyint>;
+
+/*
+ * TODO(jfb): Add the following for 32-bit and 64-bit.
+ *
+ * float32.eq: compare equal
+ * float32.lt: less than
+ * float32.le: less than or equal
+ * float32.gt: greater than
+ * float32.ge: greater than or equal
+ */
+
+defm SQRT : UnaryFP<fsqrt>;
+
+/*
+ * TODO(jfb): Add the following for 32-bit and 64-bit.
+ *
+ * float32.min: minimum (binary operator); if either operand is NaN, returns NaN
+ * float32.max: maximum (binary operator); if either operand is NaN, returns NaN
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 8bbf3e9ec87b..513c36fa2ec2 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// WebAssembly instruction format definitions.
-//
+///
+/// \file
+/// \brief WebAssembly instruction format definitions.
+///
 //===----------------------------------------------------------------------===//
 
 // WebAssembly Instruction Format
@@ -26,3 +27,29 @@ class I<dag oops, dag iops, list<dag> pattern, string cstr = "">
   dag InOperandList  = iops;
   let Pattern        = pattern;
 }
+
+// Unary and binary instructions, for the local types that WebAssembly supports.
+multiclass UnaryInt<SDNode node> {
+  def _I32 : I<(outs Int32:$dst), (ins Int32:$src),
+               [(set Int32:$dst, (node Int32:$src))]>;
+  def _I64 : I<(outs Int64:$dst), (ins Int64:$src),
+               [(set Int64:$dst, (node Int64:$src))]>;
+}
+multiclass BinaryInt<SDNode node> {
+  def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs),
+               [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>;
+  def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs),
+               [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>;
+}
+multiclass UnaryFP<SDNode node> {
+  def _F32 : I<(outs Float32:$dst), (ins Float32:$src),
+               [(set Float32:$dst, (node Float32:$src))]>;
+  def _F64 : I<(outs Float64:$dst), (ins Float64:$src),
+               [(set Float64:$dst, (node Float64:$src))]>;
+}
+multiclass BinaryFP<SDNode node> {
+  def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs),
+               [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>;
+  def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs),
+               [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 142eccfbcaa5..fe3ca76dc08a 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// WebAssembly Instruction definitions.
-//
+///
+/// \file
+/// \brief WebAssembly Instruction definitions.
+///
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -32,6 +33,13 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
 // WebAssembly-specific Operands.
 //===----------------------------------------------------------------------===//
 
+/*
+ * TODO(jfb): Add the following.
+ *
+ * get_local: read the current value of a local variable
+ * set_local: set the current value of a local variable
+*/
+
 //===----------------------------------------------------------------------===//
 // WebAssembly Instruction Format Definitions.
 //===----------------------------------------------------------------------===//
@@ -42,5 +50,10 @@ include "WebAssemblyInstrFormats.td"
 // Additional sets of instructions.
 //===----------------------------------------------------------------------===//
 
+include "WebAssemblyInstrMemory.td"
+include "WebAssemblyInstrCall.td"
+include "WebAssemblyInstrInteger.td"
+include "WebAssemblyInstrFloat.td"
+include "WebAssemblyInstrConv.td"
 include "WebAssemblyInstrAtomics.td"
 include "WebAssemblyInstrSIMD.td"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
new file mode 100644
index 000000000000..5f60fe81b1a2
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -0,0 +1,45 @@
+// WebAssemblyInstrInteger.td-WebAssembly Integer codegen -------*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Integer operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+defm ADD : BinaryInt<add>;
+defm SUB : BinaryInt<sub>;
+defm MUL : BinaryInt<mul>;
+defm SDIV : BinaryInt<sdiv>;
+defm UDIV : BinaryInt<udiv>;
+defm SREM : BinaryInt<srem>;
+defm UREM : BinaryInt<urem>;
+defm AND : BinaryInt<and>;
+defm IOR : BinaryInt<or>;
+defm XOR : BinaryInt<xor>;
+defm SHL : BinaryInt<shl>;
+defm SHR : BinaryInt<srl>;
+defm SAR : BinaryInt<sra>;
+
+/*
+ * TODO(jfb): Add the following for 32-bit and 64-bit.
+ *
+ * int32.eq: signed-less compare equal
+ * int32.slt: signed less than
+ * int32.sle: signed less than or equal
+ * int32.ult: unsigned less than
+ * int32.ule: unsigned less than or equal
+ * int32.sgt: signed greater than
+ * int32.sge: signed greater than or equal
+ * int32.ugt: unsigned greater than
+ * int32.uge: unsigned greater than or equal
+ */
+
+defm CLZ : UnaryInt<ctlz>;
+defm CTZ : UnaryInt<cttz>;
+defm POPCNT : UnaryInt<ctpop>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
new file mode 100644
index 000000000000..5ab40e826caa
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -0,0 +1,46 @@
+// WebAssemblyInstrMemory.td-WebAssembly Memory codegen support -*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly Memory operand code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+/*
+ * TODO(jfb): Add the following.
+ * Each has optional alignment and immediate byte offset.
+ *
+ * int32.load_sx[int8]: sign-extend to int32
+ * int32.load_sx[int16]: sign-extend to int32
+ * int32.load_zx[int8]: zero-extend to int32
+ * int32.load_zx[int16]: zero-extend to int32
+ * int32.load[int32]: (no conversion)
+ * int64.load_sx[int8]: sign-extend to int64
+ * int64.load_sx[int16]: sign-extend to int64
+ * int64.load_sx[int32]: sign-extend to int64
+ * int64.load_zx[int8]: zero-extend to int64
+ * int64.load_zx[int16]: zero-extend to int64
+ * int64.load_zx[int32]: zero-extend to int64
+ * int64.load[int64]: (no conversion)
+ * float32.load[float32]: (no conversion)
+ * float64.load[float64]: (no conversion)
+ * 
+ * int32.store[int8]: wrap int32 to int8
+ * int32.store[int16]: wrap int32 to int16
+ * int32.store[int32]: (no conversion)
+ * int64.store[int8]: wrap int64 to int8
+ * int64.store[int16]: wrap int64 to int16
+ * int64.store[int32]: wrap int64 to int32
+ * int64.store[int64]: (no conversion)
+ * float32.store[float32]: (no conversion)
+ * float64.store[float64]: (no conversion)
+ * 
+ * load_global: load the value of a given global variable
+ * store_global: store a given value to a given global variable
+ */
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index e25483ad3f7a..3e29906219d2 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -6,9 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// WebAssembly SIMD operand code-gen constructs.
-//
+///
+/// \file
+/// \brief WebAssembly SIMD operand code-gen constructs.
+///
 //===----------------------------------------------------------------------===//
 
 // TODO: Implement SIMD instructions.
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index ad24c90af6a2..385c40bf6693 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -30,4 +30,58 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-reg-info"
 
-WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT) : TT(TT) {}
+#define GET_REGINFO_TARGET_DESC
+#include "WebAssemblyGenRegisterInfo.inc"
+
+WebAssemblyRegisterInfo::WebAssemblyRegisterInfo(const Triple &TT)
+    : WebAssemblyGenRegisterInfo(0), TT(TT) {}
+
+const MCPhysReg *
+WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
+  static const MCPhysReg CalleeSavedRegs[] = {0};
+  return CalleeSavedRegs;
+}
+
+BitVector
+WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32,
+                   WebAssembly::FP64})
+    Reserved.set(Reg);
+  return Reserved;
+}
+
+void WebAssemblyRegisterInfo::eliminateFrameIndex(
+    MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
+    RegScavenger *RS) const {
+  llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME
+}
+
+unsigned
+WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  static const unsigned Regs[2][2] = {
+      /*            !isArch64Bit       isArch64Bit      */
+      /* !hasFP */ {WebAssembly::SP32, WebAssembly::SP64},
+      /*  hasFP */ {WebAssembly::FP32, WebAssembly::FP64}};
+  const WebAssemblyFrameLowering *TFI = getFrameLowering(MF);
+  return Regs[TFI->hasFP(MF)][TT.isArch64Bit()];
+}
+
+bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool WebAssemblyRegisterInfo::needsStackRealignment(
+    const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const WebAssemblyFrameLowering *TFI = getFrameLowering(MF);
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = TFI->getStackAlignment();
+  bool requiresRealignment =
+      ((MFI->getMaxAlignment() > StackAlign) ||
+       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                       Attribute::StackAlignment));
+
+  return requiresRealignment && canRealignStack(MF);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index 55300287a51e..dbdb9d0457af 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -16,6 +16,9 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYREGISTERINFO_H
 
+#define GET_REGINFO_HEADER
+#include "WebAssemblyGenRegisterInfo.inc"
+
 namespace llvm {
 
 class MachineFunction;
@@ -23,11 +26,25 @@ class RegScavenger;
 class TargetRegisterClass;
 class Triple;
 
-class WebAssemblyRegisterInfo final {
+class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo {
   const Triple &TT;
 
 public:
   explicit WebAssemblyRegisterInfo(const Triple &TT);
+
+  // Code Generation virtual methods.
+  const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  BitVector getReservedRegs(const MachineFunction &MF) const override;
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                           unsigned FIOperandNum,
+                           RegScavenger *RS = nullptr) const override;
+
+  // Debug information queries.
+  unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+  // Base pointer (stack realignment) support.
+  bool canRealignStack(const MachineFunction &MF) const;
+  bool needsStackRealignment(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 7b3d636a2605..2ba42eb94a40 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file describes the WebAssembly register classes and some nominal
-// physical registers.
-//
+///
+/// \file
+/// \brief This file describes the WebAssembly register classes and some nominal
+/// physical registers.
+///
 //===----------------------------------------------------------------------===//
 
 class WebAssemblyReg<string n> : Register<n> {
@@ -23,6 +24,31 @@ class WebAssemblyRegClass<list<ValueType> regTypes, int alignment, dag regList>
 // Registers
 //===----------------------------------------------------------------------===//
 
+// Special registers used as the frame and stack pointer.
+//
+// WebAssembly may someday supports mixed 32-bit and 64-bit heaps in the same
+// application, which requires separate width FP and SP.
+def FP32 : WebAssemblyReg<"%FP32">;
+def FP64 : WebAssemblyReg<"%FP64">;
+def SP32 : WebAssemblyReg<"%SP32">;
+def SP64 : WebAssemblyReg<"%SP64">;
+
+// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do
+//           away with it? Try deleting once the backend works.
+// WebAssembly uses virtual registers, but the backend defines a few physical
+// registers here to keep SDAG and the MachineInstr layers happy.
+foreach i = 0-4 in {
+  def I#i : WebAssemblyReg<"%i."#i>; // i32
+  def L#i : WebAssemblyReg<"%l."#i>; // i64
+  def F#i : WebAssemblyReg<"%f."#i>; // f32
+  def D#i : WebAssemblyReg<"%d."#i>; // f64
+}
+
 //===----------------------------------------------------------------------===//
 //  Register classes
 //===----------------------------------------------------------------------===//
+
+def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>;
+def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>;
+def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
+def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>;
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index cfd1bafff236..fae9c6100510 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -17,7 +17,4 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-selectiondag-info"
 
-WebAssemblySelectionDAGInfo::WebAssemblySelectionDAGInfo(const DataLayout *DL)
-    : TargetSelectionDAGInfo(DL) {}
-
 WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() {}
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
index 03e8d393558d..13d96671276d 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -22,7 +22,6 @@ namespace llvm {
 
 class WebAssemblySelectionDAGInfo final : public TargetSelectionDAGInfo {
 public:
-  explicit WebAssemblySelectionDAGInfo(const DataLayout *DL);
   ~WebAssemblySelectionDAGInfo() override;
 };
 
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index addea8e3cc36..3d9e7aacbfbf 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -42,7 +42,7 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
                                            const TargetMachine &TM)
     : WebAssemblyGenSubtargetInfo(TT, CPU, FS), HasSIMD128(false),
       CPUString(CPU), TargetTriple(TT), FrameLowering(),
-      InstrInfo(initializeSubtargetDependencies(FS)),
-      TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
+      InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+      TLInfo(TM, *this) {}
 
 bool WebAssemblySubtarget::enableMachineScheduler() const { return true; }
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 08bd88c06985..7ffb6047b963 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -31,7 +31,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
   typedef TargetTransformInfo TTI;
   friend BaseT;
 
-  const WebAssemblyTargetMachine *TM;
   const WebAssemblySubtarget *ST;
   const WebAssemblyTargetLowering *TLI;
 
@@ -40,30 +39,15 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
 
 public:
   WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F)
-      : BaseT(TM), TM(TM), ST(TM->getSubtargetImpl(F)),
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), TM(Arg.TM), ST(Arg.ST),
-        TLI(Arg.TLI) {}
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
   WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), TM(std::move(Arg.TM)),
-        ST(std::move(Arg.ST)), TLI(std::move(Arg.TLI)) {}
-  WebAssemblyTTIImpl &operator=(const WebAssemblyTTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    TM = RHS.TM;
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  WebAssemblyTTIImpl &operator=(WebAssemblyTTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    TM = std::move(RHS.TM);
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
 
   /// \name Scalar TTI Implementations
   /// @{
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 3cad9fa1e2ae..91b144a44824 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -878,6 +878,29 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
+  case X86::EXTRQI:
+    if (MI->getOperand(2).isImm() &&
+        MI->getOperand(3).isImm())
+      DecodeEXTRQIMask(MI->getOperand(2).getImm(),
+                       MI->getOperand(3).getImm(),
+                       ShuffleMask);
+
+    DestName = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+
+  case X86::INSERTQI:
+    if (MI->getOperand(3).isImm() &&
+        MI->getOperand(4).isImm())
+      DecodeINSERTQIMask(MI->getOperand(3).getImm(),
+                         MI->getOperand(4).getImm(),
+                         ShuffleMask);
+
+    DestName = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    break;
+
   case X86::PMOVZXBWrr:
   case X86::PMOVZXBDrr:
   case X86::PMOVZXBQrr:
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 3e0dc1424609..629802f5dc5e 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -220,7 +220,6 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
   case X86::PUSH32i8:  return X86::PUSHi32;
   case X86::PUSH16i8:  return X86::PUSHi16;
   case X86::PUSH64i8:  return X86::PUSH64i32;
-  case X86::PUSH64i16: return X86::PUSH64i32;
   }
 }
 
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 89f394582631..ddb764facdbf 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -34,14 +34,16 @@ public:
       report_fatal_error(EC.message());
     StringRef SymName = *SymNameOrErr;
 
-    uint64_t  SymAddr; SymI->getAddress(SymAddr);
+    ErrorOr<uint64_t> SymAddr = SymI->getAddress();
+    if (std::error_code EC = SymAddr.getError())
+      report_fatal_error(EC.message());
     uint64_t SymSize = SymI->getSize();
     int64_t Addend = *ELFRelocationRef(Rel).getAddend();
 
     MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName);
     // FIXME: check that the value is actually the same.
     if (!Sym->isVariable())
-      Sym->setVariableValue(MCConstantExpr::create(SymAddr, Ctx));
+      Sym->setVariableValue(MCConstantExpr::create(*SymAddr, Ctx));
 
     const MCExpr *Expr = nullptr;
     // If hasAddend is true, then we need to add Addend (r_addend) to Expr.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 431010d4cbc2..83b4091d7665 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -88,9 +88,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
   if (CPUName.empty())
     CPUName = "generic";
 
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
-  return X;
+  return createX86MCSubtargetInfoImpl(TT, CPUName, ArchFS);
 }
 
 static MCInstrInfo *createX86MCInstrInfo() {
@@ -99,17 +97,14 @@ static MCInstrInfo *createX86MCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
-  Triple TheTriple(TT);
-  unsigned RA = (TheTriple.getArch() == Triple::x86_64)
-    ? X86::RIP     // Should have dwarf #16.
-    : X86::EIP;    // Should have dwarf #8.
+static MCRegisterInfo *createX86MCRegisterInfo(const Triple &TT) {
+  unsigned RA = (TT.getArch() == Triple::x86_64)
+                    ? X86::RIP  // Should have dwarf #16.
+                    : X86::EIP; // Should have dwarf #8.
 
   MCRegisterInfo *X = new MCRegisterInfo();
-  InitX86MCRegisterInfo(X, RA,
-                        X86_MC::getDwarfRegFlavour(TheTriple, false),
-                        X86_MC::getDwarfRegFlavour(TheTriple, true),
-                        RA);
+  InitX86MCRegisterInfo(X, RA, X86_MC::getDwarfRegFlavour(TT, false),
+                        X86_MC::getDwarfRegFlavour(TT, true), RA);
   X86_MC::InitLLVM2SEHRegisterMapping(X);
   return X;
 }
@@ -156,24 +151,23 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createX86MCCodeGenInfo(const Triple &TT, Reloc::Model RM,
                                              CodeModel::Model CM,
                                              CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
 
-  Triple T(TT);
-  bool is64Bit = T.getArch() == Triple::x86_64;
+  bool is64Bit = TT.getArch() == Triple::x86_64;
 
   if (RM == Reloc::Default) {
     // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
     // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
     // use static relocation model by default.
-    if (T.isOSDarwin()) {
+    if (TT.isOSDarwin()) {
       if (is64Bit)
         RM = Reloc::PIC_;
       else
         RM = Reloc::DynamicNoPIC;
-    } else if (T.isOSWindows() && is64Bit)
+    } else if (TT.isOSWindows() && is64Bit)
       RM = Reloc::PIC_;
     else
       RM = Reloc::Static;
@@ -186,13 +180,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
   if (RM == Reloc::DynamicNoPIC) {
     if (is64Bit)
       RM = Reloc::PIC_;
-    else if (!T.isOSDarwin())
+    else if (!TT.isOSDarwin())
       RM = Reloc::Static;
   }
 
   // If we are on Darwin, disallow static relocation model in X86-64 mode, since
   // the Mach-O file format doesn't support it.
-  if (RM == Reloc::Static && T.isOSDarwin() && is64Bit)
+  if (RM == Reloc::Static && TT.isOSDarwin() && is64Bit)
     RM = Reloc::PIC_;
 
   // For static codegen, if we're not already set, use Small codegen.
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index c9479b62f7b6..9bfe999424fa 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -34,7 +34,7 @@ public:
     if (std::error_code EC = SymNameOrErr.getError())
       report_fatal_error(EC.message());
     StringRef SymName = *SymNameOrErr;
-    uint64_t  SymAddr; SymI->getAddress(SymAddr);
+    uint64_t SymAddr = SymI->getValue();
 
     any_relocation_info RE = Obj->getRelocation(Rel.getRawDataRefImpl());
     bool isPCRel = Obj->getAnyRelocationPCRel(RE);
@@ -90,8 +90,7 @@ public:
         const MCExpr *LHS = MCSymbolRefExpr::create(Sym, Ctx);
 
         symbol_iterator RSymI = Rel.getSymbol();
-        uint64_t RSymAddr;
-        RSymI->getAddress(RSymAddr);
+        uint64_t RSymAddr = RSymI->getValue();
         ErrorOr<StringRef> RSymName = RSymI->getName();
         if (std::error_code EC = RSymName.getError())
           report_fatal_error(EC.message());
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index ef3318ba7580..cae865a40819 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -255,15 +255,13 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
 
 void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
                           SmallVectorImpl<int> &ShuffleMask) {
-  if (Imm & 0x88)
-    return; // Not a shuffle
-
   unsigned HalfSize = VT.getVectorNumElements() / 2;
 
   for (unsigned l = 0; l != 2; ++l) {
-    unsigned HalfBegin = ((Imm >> (l * 4)) & 0x3) * HalfSize;
+    unsigned HalfMask = Imm >> (l * 4);
+    unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
     for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
-      ShuffleMask.push_back(i);
+      ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
   }
 }
 
@@ -431,4 +429,78 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
   for (unsigned i = 1; i < NumElts; i++)
     Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
 }
+
+void DecodeEXTRQIMask(int Len, int Idx,
+                      SmallVectorImpl<int> &ShuffleMask) {
+  // Only the bottom 6 bits are valid for each immediate.
+  Len &= 0x3F;
+  Idx &= 0x3F;
+
+  // We can only decode this bit extraction instruction as a shuffle if both the
+  // length and index work with whole bytes.
+  if (0 != (Len % 8) || 0 != (Idx % 8))
+    return;
+
+  // A length of zero is equivalent to a bit length of 64.
+  if (Len == 0)
+    Len = 64;
+
+  // If the length + index exceeds the bottom 64 bits the result is undefined.
+  if ((Len + Idx) > 64) {
+    ShuffleMask.append(16, SM_SentinelUndef);
+    return;
+  }
+
+  // Convert index and index to work with bytes.
+  Len /= 8;
+  Idx /= 8;
+
+  // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
+  // of the lower 64-bits. The upper 64-bits are undefined.
+  for (int i = 0; i != Len; ++i)
+    ShuffleMask.push_back(i + Idx);
+  for (int i = Len; i != 8; ++i)
+    ShuffleMask.push_back(SM_SentinelZero);
+  for (int i = 8; i != 16; ++i)
+    ShuffleMask.push_back(SM_SentinelUndef);
+}
+
+void DecodeINSERTQIMask(int Len, int Idx,
+                        SmallVectorImpl<int> &ShuffleMask) {
+  // Only the bottom 6 bits are valid for each immediate.
+  Len &= 0x3F;
+  Idx &= 0x3F;
+
+  // We can only decode this bit insertion instruction as a shuffle if both the
+  // length and index work with whole bytes.
+  if (0 != (Len % 8) || 0 != (Idx % 8))
+    return;
+
+  // A length of zero is equivalent to a bit length of 64.
+  if (Len == 0)
+    Len = 64;
+
+  // If the length + index exceeds the bottom 64 bits the result is undefined.
+  if ((Len + Idx) > 64) {
+    ShuffleMask.append(16, SM_SentinelUndef);
+    return;
+  }
+
+  // Convert index and index to work with bytes.
+  Len /= 8;
+  Idx /= 8;
+
+  // INSERTQ: Extract lowest Len bytes from lower half of second source and
+  // insert over first source starting at Idx byte. The upper 64-bits are
+  // undefined.
+  for (int i = 0; i != Idx; ++i)
+    ShuffleMask.push_back(i);
+  for (int i = 0; i != Len; ++i)
+    ShuffleMask.push_back(i + 16);
+  for (int i = Idx + Len; i != 8; ++i)
+    ShuffleMask.push_back(i);
+  for (int i = 8; i != 16; ++i)
+    ShuffleMask.push_back(SM_SentinelUndef);
+}
+
 } // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 14b69434806e..3d10d18e860e 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -100,6 +100,14 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
 /// \brief Decode a scalar float move instruction as a shuffle mask.
 void DecodeScalarMoveMask(MVT VT, bool IsLoad,
                           SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
+void DecodeEXTRQIMask(int Len, int Idx,
+                      SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
+void DecodeINSERTQIMask(int Len, int Idx,
+                        SmallVectorImpl<int> &ShuffleMask);
 } // llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 02645460b6a2..b4319c8bb04f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -317,7 +317,7 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
 }
 
 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
-  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+  EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true);
   if (evt == MVT::Other || !evt.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
     return false;
@@ -608,7 +608,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
         // Prepare for inserting code in the local-value area.
         SavePoint SaveInsertPt = enterLocalValueArea();
 
-        if (TLI.getPointerTy() == MVT::i64) {
+        if (TLI.getPointerTy(DL) == MVT::i64) {
           Opc = X86::MOV64rm;
           RC  = &X86::GR64RegClass;
 
@@ -690,13 +690,14 @@ redo_gep:
 
   case Instruction::IntToPtr:
     // Look past no-op inttoptrs.
-    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+        TLI.getPointerTy(DL))
       return X86SelectAddress(U->getOperand(0), AM);
     break;
 
   case Instruction::PtrToInt:
     // Look past no-op ptrtoints.
-    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
       return X86SelectAddress(U->getOperand(0), AM);
     break;
 
@@ -866,14 +867,14 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
   case Instruction::IntToPtr:
     // Look past no-op inttoptrs if its operand is in the same BB.
     if (InMBB &&
-        TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+        TLI.getValueType(DL, U->getOperand(0)->getType()) ==
+            TLI.getPointerTy(DL))
       return X86SelectCallAddress(U->getOperand(0), AM);
     break;
 
   case Instruction::PtrToInt:
     // Look past no-op ptrtoints if its operand is in the same BB.
-    if (InMBB &&
-        TLI.getValueType(U->getType()) == TLI.getPointerTy())
+    if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
       return X86SelectCallAddress(U->getOperand(0), AM);
     break;
   }
@@ -1000,7 +1001,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
 
   if (Ret->getNumOperands() > 0) {
     SmallVector<ISD::OutputArg, 4> Outs;
-    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
 
     // Analyze operands of the call, assigning locations to each operand.
     SmallVector<CCValAssign, 16> ValLocs;
@@ -1031,7 +1032,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
       return false;
 
     unsigned SrcReg = Reg + VA.getValNo();
-    EVT SrcVT = TLI.getValueType(RV->getType());
+    EVT SrcVT = TLI.getValueType(DL, RV->getType());
     EVT DstVT = VA.getValVT();
     // Special handling for extended integers.
     if (SrcVT != DstVT) {
@@ -1300,7 +1301,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
 }
 
 bool X86FastISel::X86SelectZExt(const Instruction *I) {
-  EVT DstVT = TLI.getValueType(I->getType());
+  EVT DstVT = TLI.getValueType(DL, I->getType());
   if (!TLI.isTypeLegal(DstVT))
     return false;
 
@@ -1309,7 +1310,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
     return false;
 
   // Handle zero-extension from i1 to i8, which is common.
-  MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
+  MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
   if (SrcVT.SimpleTy == MVT::i1) {
     // Set the high bits to zero.
     ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
@@ -1362,7 +1363,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
   X86::CondCode CC;
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
     if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
-      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+      EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType());
 
       // Try to optimize or fold the cmp.
       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
@@ -1802,7 +1803,7 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
     if (NeedSwap)
       std::swap(CmpLHS, CmpRHS);
 
-    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+    EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
     // Emit a compare of the LHS and RHS, setting the flags.
     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
       return false;
@@ -2004,7 +2005,7 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
     if (NeedSwap)
       std::swap(CmpLHS, CmpRHS);
 
-    EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+    EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType());
     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
       return false;
   } else {
@@ -2166,8 +2167,8 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
 }
 
 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
-  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-  EVT DstVT = TLI.getValueType(I->getType());
+  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(DL, I->getType());
 
   // This code only handles truncation to byte.
   if (DstVT != MVT::i8 && DstVT != MVT::i1)
@@ -2416,7 +2417,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
   }
   case Intrinsic::stackprotector: {
     // Emit code to store the stack guard onto the stack.
-    EVT PtrTy = TLI.getPointerTy();
+    EVT PtrTy = TLI.getPointerTy(DL);
 
     const Value *Op1 = II->getArgOperand(0); // The guard's value.
     const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
@@ -2735,7 +2736,7 @@ bool X86FastISel::fastLowerArguments() {
     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
       return false;
 
-    EVT ArgVT = TLI.getValueType(ArgTy);
+    EVT ArgVT = TLI.getValueType(DL, ArgTy);
     if (!ArgVT.isSimple()) return false;
     switch (ArgVT.getSimpleVT().SimpleTy) {
     default: return false;
@@ -2772,7 +2773,7 @@ bool X86FastISel::fastLowerArguments() {
   unsigned GPRIdx = 0;
   unsigned FPRIdx = 0;
   for (auto const &Arg : F->args()) {
-    MVT VT = TLI.getSimpleValueType(Arg.getType());
+    MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
     unsigned SrcReg;
     switch (VT.SimpleTy) {
@@ -3108,7 +3109,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
         GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
       OpFlags = X86II::MO_PLT;
     } else if (Subtarget->isPICStyleStubAny() &&
-               (GV->isDeclaration() || GV->isWeakForLinker()) &&
+               !GV->isStrongDefinitionForLinker() &&
                (!Subtarget->getTargetTriple().isMacOSX() ||
                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
       // PC-relative references to external symbols should go through $stub,
@@ -3240,8 +3241,8 @@ X86FastISel::fastSelectInstruction(const Instruction *I)  {
     return X86SelectSIToFP(I);
   case Instruction::IntToPtr: // Deliberate fall-through.
   case Instruction::PtrToInt: {
-    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
-    EVT DstVT = TLI.getValueType(I->getType());
+    EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(DL, I->getType());
     if (DstVT.bitsGT(SrcVT))
       return X86SelectZExt(I);
     if (DstVT.bitsLT(SrcVT))
@@ -3384,7 +3385,7 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
     addDirectMem(MIB, AddrReg);
     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
         MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
-        TM.getDataLayout()->getPointerSize(), Align);
+        DL.getPointerSize(), Align);
     MIB->addMemOperand(*FuncInfo.MF, MMO);
     return ResultReg;
   }
@@ -3411,17 +3412,17 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
 
     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
     if (TM.getRelocationModel() == Reloc::Static &&
-        TLI.getPointerTy() == MVT::i64) {
+        TLI.getPointerTy(DL) == MVT::i64) {
       // The displacement code could be more than 32 bits away so we need to use
       // an instruction with a 64 bit immediate
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
               ResultReg)
         .addGlobalAddress(GV);
     } else {
-      unsigned Opc = TLI.getPointerTy() == MVT::i32
-                     ? (Subtarget->isTarget64BitILP32()
-                        ? X86::LEA64_32r : X86::LEA32r)
-                     : X86::LEA64r;
+      unsigned Opc =
+          TLI.getPointerTy(DL) == MVT::i32
+              ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+              : X86::LEA64r;
       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                              TII.get(Opc), ResultReg), AM);
     }
@@ -3431,7 +3432,7 @@ unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
 }
 
 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
-  EVT CEVT = TLI.getValueType(C->getType(), true);
+  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 
   // Only handle simple types.
   if (!CEVT.isSimple())
@@ -3463,11 +3464,11 @@ unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
   X86AddressMode AM;
   if (!X86SelectAddress(C, AM))
     return 0;
-  unsigned Opc = TLI.getPointerTy() == MVT::i32
-                 ? (Subtarget->isTarget64BitILP32()
-                    ? X86::LEA64_32r : X86::LEA32r)
-                 : X86::LEA64r;
-  const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+  unsigned Opc =
+      TLI.getPointerTy(DL) == MVT::i32
+          ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r)
+          : X86::LEA64r;
+  const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL));
   unsigned ResultReg = createResultReg(RC);
   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                          TII.get(Opc), ResultReg), AM);
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 40b9c8a863a3..36a8cdbab55b 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -301,8 +301,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
   bool FPIsUsed = false;
 
   static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   for (unsigned i = 0; i <= 6; ++i)
-    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+    if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
       FPIsUsed = true;
       break;
     }
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 85c5b6499131..2a35c4cf31f3 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -90,7 +90,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           TRI->needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
+          MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
           MMI.callsUnwindInit() || MMI.callsEHReturn() ||
           MFI->hasStackMap() || MFI->hasPatchPoint());
@@ -967,13 +967,26 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       .addReg(StackPtr)
       .setMIFlag(MachineInstr::FrameSetup);
     if (X86FI->getRestoreBasePointer()) {
-      // Stash value of base pointer.  Saving RSP instead of EBP shortens dependence chain.
+      // Stash value of base pointer.  Saving RSP instead of EBP shortens
+      // dependence chain. Used by SjLj EH.
       unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
                    FramePtr, true, X86FI->getRestoreBasePointerOffset())
         .addReg(StackPtr)
         .setMIFlag(MachineInstr::FrameSetup);
     }
+
+    if (X86FI->getHasSEHFramePtrSave()) {
+      // Stash the value of the frame pointer relative to the base pointer for
+      // Win32 EH. This supports Win32 EH, which does the inverse of the above:
+      // it recovers the frame pointer from the base pointer rather than the
+      // other way around.
+      unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true,
+                   getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex()))
+          .addReg(FramePtr)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
   }
 
   if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
@@ -1412,9 +1425,11 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-void
-X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                       RegScavenger *RS) const {
+void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                            BitVector &SavedRegs,
+                                            RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
   MachineFrameInfo *MFI = MF.getFrameInfo();
 
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1436,7 +1451,7 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Spill the BasePtr if it's used.
   if (TRI->hasBasePointer(MF))
-    MF.getRegInfo().setPhysRegUsed(TRI->getBaseRegister());
+    SavedRegs.set(TRI->getBaseRegister());
 }
 
 static bool
@@ -1667,8 +1682,6 @@ void X86FrameLowering::adjustForSegmentedStacks(
       .addImm(StackSize);
     BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
       .addImm(X86FI->getArgumentStackSize());
-    MF.getRegInfo().setPhysRegUsed(Reg10);
-    MF.getRegInfo().setPhysRegUsed(Reg11);
   } else {
     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
       .addImm(X86FI->getArgumentStackSize());
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index c274c8820149..495cfcd1c3f7 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -68,8 +68,8 @@ public:
   void adjustForHiPEPrologue(MachineFunction &MF,
                              MachineBasicBlock &PrologueMBB) const override;
 
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS = nullptr) const override;
+  void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                            RegScavenger *RS = nullptr) const override;
 
   bool
   assignCalleeSavedSpillSlots(MachineFunction &MF,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 6b23e62a2d35..d5351d25d6ed 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -246,8 +246,9 @@ namespace {
                                    SDValue &Index, SDValue &Disp,
                                    SDValue &Segment) {
       Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
-                 ? CurDAG->getTargetFrameIndex(AM.Base_FrameIndex,
-                                               TLI->getPointerTy())
+                 ? CurDAG->getTargetFrameIndex(
+                       AM.Base_FrameIndex,
+                       TLI->getPointerTy(CurDAG->getDataLayout()))
                  : AM.Base_Reg;
       Scale = getI8Imm(AM.Scale, DL);
       Index = AM.IndexReg;
@@ -581,11 +582,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
 void X86DAGToDAGISel::EmitSpecialCodeForMain() {
   if (Subtarget->isTargetCygMing()) {
     TargetLowering::ArgListTy Args;
+    auto &DL = CurDAG->getDataLayout();
 
     TargetLowering::CallLoweringInfo CLI(*CurDAG);
     CLI.setChain(CurDAG->getRoot())
         .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
-                   CurDAG->getExternalSymbol("__main", TLI->getPointerTy()),
+                   CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
                    std::move(Args), 0);
     const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
     std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
@@ -1025,7 +1027,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
   switch (N.getOpcode()) {
   default: break;
-  case ISD::FRAME_ALLOC_RECOVER: {
+  case ISD::LOCAL_RECOVER: {
     if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
       if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
         // Use the symbol and don't prefix it.
@@ -1638,7 +1640,8 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
 ///
 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
-  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy()).getNode();
+  auto &DL = MF->getDataLayout();
+  return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
 }
 
 /// Atomic opcode table
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b16bd18aefaa..6e22ab30057c 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -76,7 +76,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     : TargetLowering(TM), Subtarget(&STI) {
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
-  TD = getDataLayout();
+  TD = TM.getDataLayout();
 
   // Set up the TargetLowering object.
   static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
@@ -505,7 +505,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(), Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom);
 
   // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
   setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
@@ -825,6 +825,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
 
+    setOperationAction(ISD::SMAX,               MVT::v8i16, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v16i8, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v8i16, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v16i8, Legal);
+
     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
@@ -944,6 +949,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
     }
 
+    setOperationAction(ISD::SMAX,               MVT::v16i8, Legal);
+    setOperationAction(ISD::SMAX,               MVT::v4i32, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v8i16, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v4i32, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v16i8, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v4i32, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v8i16, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v4i32, Legal);
+
     // FIXME: Do we need to handle scalar-to-vector here?
     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
 
@@ -1018,6 +1032,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
     setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
 
+    setOperationAction(ISD::SRA,               MVT::v2i64, Custom);
     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
   }
 
@@ -1141,6 +1156,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::MULHU,           MVT::v16i16, Legal);
       setOperationAction(ISD::MULHS,           MVT::v16i16, Legal);
 
+      setOperationAction(ISD::SMAX,            MVT::v32i8,  Legal);
+      setOperationAction(ISD::SMAX,            MVT::v16i16, Legal);
+      setOperationAction(ISD::SMAX,            MVT::v8i32,  Legal);
+      setOperationAction(ISD::UMAX,            MVT::v32i8,  Legal);
+      setOperationAction(ISD::UMAX,            MVT::v16i16, Legal);
+      setOperationAction(ISD::UMAX,            MVT::v8i32,  Legal);
+      setOperationAction(ISD::SMIN,            MVT::v32i8,  Legal);
+      setOperationAction(ISD::SMIN,            MVT::v16i16, Legal);
+      setOperationAction(ISD::SMIN,            MVT::v8i32,  Legal);
+      setOperationAction(ISD::UMIN,            MVT::v32i8,  Legal);
+      setOperationAction(ISD::UMIN,            MVT::v16i16, Legal);
+      setOperationAction(ISD::UMIN,            MVT::v8i32,  Legal);
+
       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
       // when we have a 256bit-wide blend with immediate.
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -1184,6 +1212,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
     setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
 
+    setOperationAction(ISD::SRA,               MVT::v4i64, Custom);
     setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
 
     // Custom lower several nodes for 256-bit types.
@@ -1376,6 +1405,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SELECT,             MVT::v16i1, Custom);
     setOperationAction(ISD::SELECT,             MVT::v8i1,  Custom);
 
+    setOperationAction(ISD::SMAX,               MVT::v16i32, Legal);
+    setOperationAction(ISD::SMAX,               MVT::v8i64, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v16i32, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v8i64, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v16i32, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v8i64, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v16i32, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v8i64, Legal);
+
     setOperationAction(ISD::ADD,                MVT::v8i64, Legal);
     setOperationAction(ISD::ADD,                MVT::v16i32, Legal);
 
@@ -1473,6 +1511,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SUB,                MVT::v32i16, Legal);
     setOperationAction(ISD::SUB,                MVT::v64i8, Legal);
     setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
+    setOperationAction(ISD::MULHS,              MVT::v32i16, Legal);
+    setOperationAction(ISD::MULHU,              MVT::v32i16, Legal);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
@@ -1492,6 +1532,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::TRUNCATE,           MVT::v32i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v64i1, Custom);
 
+    setOperationAction(ISD::SMAX,               MVT::v64i8, Legal);
+    setOperationAction(ISD::SMAX,               MVT::v32i16, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v64i8, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v32i16, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v64i8, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v32i16, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v64i8, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v32i16, Legal);
+
     for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
       const MVT VT = (MVT::SimpleValueType)i;
 
@@ -1531,6 +1580,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::XOR,                MVT::v4i32, Legal);
     setOperationAction(ISD::SRA,                MVT::v2i64, Custom);
     setOperationAction(ISD::SRA,                MVT::v4i64, Custom);
+
+    setOperationAction(ISD::SMAX,               MVT::v2i64, Legal);
+    setOperationAction(ISD::SMAX,               MVT::v4i64, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v2i64, Legal);
+    setOperationAction(ISD::UMAX,               MVT::v4i64, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v2i64, Legal);
+    setOperationAction(ISD::SMIN,               MVT::v4i64, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v2i64, Legal);
+    setOperationAction(ISD::UMIN,               MVT::v4i64, Legal);
   }
 
   // We want to custom lower some of our intrinsics.
@@ -1611,6 +1669,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
   setTargetDAGCombine(ISD::SINT_TO_FP);
+  setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
@@ -1652,7 +1711,8 @@ X86TargetLowering::getPreferredVectorAction(EVT VT) const {
   return TargetLoweringBase::getPreferredVectorAction(VT);
 }
 
-EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+                                          EVT VT) const {
   if (!VT.isVector())
     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
 
@@ -1724,10 +1784,11 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
 /// function arguments in the caller parameter area. For X86, aggregates
 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
 /// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
+unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
+                                                  const DataLayout &DL) const {
   if (Subtarget->is64Bit()) {
     // Max of 8 and alignment of type.
-    unsigned TyAlign = TD->getABITypeAlignment(Ty);
+    unsigned TyAlign = DL.getABITypeAlignment(Ty);
     if (TyAlign > 8)
       return TyAlign;
     return 8;
@@ -1840,7 +1901,8 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
   if (!Subtarget->is64Bit())
     // This doesn't have SDLoc associated with it, but is not really the
     // same as a Register.
-    return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy());
+    return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+                       getPointerTy(DAG.getDataLayout()));
   return Table;
 }
 
@@ -2032,7 +2094,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
   // false, then an sret argument may be implicitly inserted in the SelDAG. In
   // either case FuncInfo->setSRetReturnReg() will have been called.
   if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg, getPointerTy());
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, SRetReg,
+                                     getPointerTy(MF.getDataLayout()));
 
     unsigned RetValReg
         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
@@ -2041,7 +2104,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     Flag = Chain.getValue(1);
 
     // RAX/EAX now acts like a return value.
-    RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy()));
+    RetOps.push_back(
+        DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
   }
 
   RetOps[0] = Chain;  // Update chain.
@@ -2288,11 +2352,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
     unsigned Bytes = Flags.getByValSize();
     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
-    return DAG.getFrameIndex(FI, getPointerTy());
+    return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   } else {
     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
                                     VA.getLocMemOffset(), isImmutable);
-    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     SDValue Val =  DAG.getLoad(ValVT, dl, Chain, FIN,
                                MachinePointerInfo::getFixedStack(FI),
                                false, false, false, 0);
@@ -2471,7 +2535,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     if (Ins[i].Flags.isSRet()) {
       unsigned Reg = FuncInfo->getSRetReturnReg();
       if (!Reg) {
-        MVT PtrTy = getPointerTy();
+        MVT PtrTy = getPointerTy(DAG.getDataLayout());
         Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
         FuncInfo->setSRetReturnReg(Reg);
       }
@@ -2499,7 +2563,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   MachineModuleInfo &MMI = MF.getMMI();
   const Function *WinEHParent = nullptr;
-  if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
+  if (MMI.hasWinEHFuncInfo(Fn))
     WinEHParent = MMI.getWinEHParent(Fn);
   bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
   bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
@@ -2561,11 +2625,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     // Store the integer parameter registers.
     SmallVector<SDValue, 8> MemOps;
     SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
-                                      getPointerTy());
+                                      getPointerTy(DAG.getDataLayout()));
     unsigned Offset = FuncInfo->getVarArgsGPOffset();
     for (SDValue Val : LiveGPRs) {
-      SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
-                                DAG.getIntPtrConstant(Offset, dl));
+      SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+                                RSFIN, DAG.getIntPtrConstant(Offset, dl));
       SDValue Store =
         DAG.getStore(Val.getValue(1), dl, Val, FIN,
                      MachinePointerInfo::getFixedStack(
@@ -2592,7 +2656,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     if (!MemOps.empty())
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
-  } else if (IsWinEHOutlined) {
+  } else if (IsWin64 && IsWinEHOutlined) {
     // Get to the caller-allocated home save location.  Add 8 to account
     // for the return address.
     int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2605,8 +2669,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     // Store the second integer parameter (rdx) into rsp+16 relative to the
     // stack pointer at the entry of the function.
-    SDValue RSFIN =
-        DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
+    SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                      getPointerTy(DAG.getDataLayout()));
     unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
     SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
     Chain = DAG.getStore(
@@ -2680,14 +2744,21 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   FuncInfo->setArgumentStackSize(StackSize);
 
   if (IsWinEHParent) {
-    int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
-    SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
-    MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
-    SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
-    Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
-                         MachinePointerInfo::getFixedStack(UnwindHelpFI),
-                         /*isVolatile=*/true,
-                         /*isNonTemporal=*/false, /*Alignment=*/0);
+    if (Is64Bit) {
+      int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+      SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
+      MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
+      SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
+      Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
+                           MachinePointerInfo::getFixedStack(UnwindHelpFI),
+                           /*isVolatile=*/true,
+                           /*isNonTemporal=*/false, /*Alignment=*/0);
+    } else {
+      // Functions using Win32 EH are considered to have opaque SP adjustments
+      // to force local variables to be addressed from the frame or base
+      // pointers.
+      MFI->setHasOpaqueSPAdjustment(true);
+    }
   }
 
   return Chain;
@@ -2701,7 +2772,8 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
                                     ISD::ArgFlagsTy Flags) const {
   unsigned LocMemOffset = VA.getLocMemOffset();
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
-  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+                       StackPtr, PtrOff);
   if (Flags.isByVal())
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
 
@@ -2718,7 +2790,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
                                            bool IsTailCall, bool Is64Bit,
                                            int FPDiff, SDLoc dl) const {
   // Adjust the Return address stack slot.
-  EVT VT = getPointerTy();
+  EVT VT = getPointerTy(DAG.getDataLayout());
   OutRetAddr = getReturnAddressFrameIndex(DAG);
 
   // Load the "old" Return address.
@@ -2942,7 +3014,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       assert(VA.isMemLoc());
       if (!StackPtr.getNode())
         StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
-                                      getPointerTy());
+                                      getPointerTy(DAG.getDataLayout()));
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
                                              dl, DAG, VA, Flags));
     }
@@ -2955,8 +3027,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // ELF / PIC requires GOT in the EBX register before function calls via PLT
     // GOT pointer.
     if (!isTailCall) {
-      RegsToPass.push_back(std::make_pair(unsigned(X86::EBX),
-               DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), getPointerTy())));
+      RegsToPass.push_back(std::make_pair(
+          unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
+                                          getPointerTy(DAG.getDataLayout()))));
     } else {
       // If we are tail calling and generating PIC/GOT style code load the
       // address of the callee into ECX. The value in ecx is used as target of
@@ -3036,16 +3109,16 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       int32_t Offset = VA.getLocMemOffset()+FPDiff;
       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
       FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
-      FIN = DAG.getFrameIndex(FI, getPointerTy());
+      FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
 
       if (Flags.isByVal()) {
         // Copy relative to framepointer.
         SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
         if (!StackPtr.getNode())
-          StackPtr = DAG.getCopyFromReg(Chain, dl,
-                                        RegInfo->getStackRegister(),
-                                        getPointerTy());
-        Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+          StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+                                        getPointerTy(DAG.getDataLayout()));
+        Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+                             StackPtr, Source);
 
         MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
                                                          ArgChain,
@@ -3064,8 +3137,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
     // Store the return address to the appropriate stack slot.
     Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
-                                     getPointerTy(), RegInfo->getSlotSize(),
-                                     FPDiff, dl);
+                                     getPointerTy(DAG.getDataLayout()),
+                                     RegInfo->getSlotSize(), FPDiff, dl);
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token chain
@@ -3106,7 +3179,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
         OpFlags = X86II::MO_PLT;
       } else if (Subtarget->isPICStyleStubAny() &&
-                 (GV->isDeclaration() || GV->isWeakForLinker()) &&
+                 !GV->isStrongDefinitionForLinker() &&
                  (!Subtarget->getTargetTriple().isMacOSX() ||
                   Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
         // PC-relative references to external symbols should go through $stub,
@@ -3123,17 +3196,18 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         ExtraLoad = true;
       }
 
-      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
-                                          G->getOffset(), OpFlags);
+      Callee = DAG.getTargetGlobalAddress(
+          GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
 
       // Add a wrapper if needed.
       if (WrapperKind != ISD::DELETED_NODE)
-        Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+        Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
+                             getPointerTy(DAG.getDataLayout()), Callee);
       // Add extra indirection if needed.
       if (ExtraLoad)
-        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
-                             MachinePointerInfo::getGOT(),
-                             false, false, false, 0);
+        Callee = DAG.getLoad(
+            getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
+            MachinePointerInfo::getGOT(), false, false, false, 0);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
@@ -3152,8 +3226,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       OpFlags = X86II::MO_DARWIN_STUB;
     }
 
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
-                                         OpFlags);
+    Callee = DAG.getTargetExternalSymbol(
+        S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
   } else if (Subtarget->isTarget64BitILP32() &&
              Callee->getValueType(0) == MVT::i32) {
     // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
@@ -3184,9 +3258,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
-  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
-  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
+  const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
+
+  // If this is an invoke in a 32-bit function using an MSVC personality, assume
+  // the function clobbers all registers. If an exception is thrown, the runtime
+  // will not restore CSRs.
+  // FIXME: Model this more precisely so that we can register allocate across
+  // the normal edge and spill and fill across the exceptional edge.
+  if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
+    const Function *CallerFn = MF.getFunction();
+    EHPersonality Pers =
+        CallerFn->hasPersonalityFn()
+            ? classifyEHPersonality(CallerFn->getPersonalityFn())
+            : EHPersonality::Unknown;
+    if (isMSVCEHPersonality(Pers))
+      Mask = RegInfo->getNoPreservedMask();
+  }
+
   Ops.push_back(DAG.getRegisterMask(Mask));
 
   if (InFlag.getNode())
@@ -3650,7 +3739,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
-  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
 }
 
 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
@@ -3881,6 +3970,15 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
   return Subtarget->hasLZCNT();
 }
 
+/// isUndefInRange - Return true if every element in Mask, beginning
+/// from position Pos and ending in Pos+Size is undef.
+static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
+  for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
+    if (0 <= Mask[i])
+      return false;
+  return true;
+}
+
 /// isUndefOrInRange - Return true if Val is undef or if its value falls within
 /// the specified range (L, H].
 static bool isUndefOrInRange(int Val, int Low, int Hi) {
@@ -4322,6 +4420,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
 /// IsUnary to true if only uses one source. Note that this will set IsUnary for
 /// shuffles which use a single input multiple times, and in those cases it will
 /// adjust the mask to only have indices within that single input.
+/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
 static bool getTargetShuffleMask(SDNode *N, MVT VT,
                                  SmallVectorImpl<int> &Mask, bool &IsUnary) {
   unsigned NumElems = VT.getVectorNumElements();
@@ -4451,6 +4550,10 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
     ImmN = N->getOperand(N->getNumOperands()-1);
     DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
     if (Mask.empty()) return false;
+    // Mask only contains negative index if an element is zero.
+    if (std::any_of(Mask.begin(), Mask.end(),
+                    [](int M){ return M == SM_SentinelZero; }))
+      return false;
     break;
   case X86ISD::MOVSLDUP:
     DecodeMOVSLDUPMask(VT, Mask);
@@ -4764,7 +4867,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
   MVT ShVT = MVT::v2i64;
   unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
   SrcOp = DAG.getBitcast(ShVT, SrcOp);
-  MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(SrcOp.getValueType());
+  MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
   assert(NumBits % 8 == 0 && "Only support byte sized shifts");
   SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
   return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
@@ -5082,7 +5185,8 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
       assert(C && "Invalid constant type");
 
       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-      SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
+      SDValue CP =
+          DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
       unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
       Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
                        MachinePointerInfo::getConstantPool(),
@@ -6857,6 +6961,136 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
   return SDValue();
 }
 
+/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
+static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
+                                           SDValue V2, ArrayRef<int> Mask,
+                                           SelectionDAG &DAG) {
+  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+  assert(!Zeroable.all() && "Fully zeroable shuffle mask");
+
+  int Size = Mask.size();
+  int HalfSize = Size / 2;
+  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+  // Upper half must be undefined.
+  if (!isUndefInRange(Mask, HalfSize, HalfSize))
+    return SDValue();
+
+  // EXTRQ: Extract Len elements from lower half of source, starting at Idx.
+  // Remainder of lower half result is zero and upper half is all undef.
+  auto LowerAsEXTRQ = [&]() {
+    // Determine the extraction length from the part of the
+    // lower half that isn't zeroable.
+    int Len = HalfSize;
+    for (; Len >= 0; --Len)
+      if (!Zeroable[Len - 1])
+        break;
+    assert(Len > 0 && "Zeroable shuffle mask");
+
+    // Attempt to match first Len sequential elements from the lower half.
+    SDValue Src;
+    int Idx = -1;
+    for (int i = 0; i != Len; ++i) {
+      int M = Mask[i];
+      if (M < 0)
+        continue;
+      SDValue &V = (M < Size ? V1 : V2);
+      M = M % Size;
+
+      // All mask elements must be in the lower half.
+      if (M > HalfSize)
+        return SDValue();
+
+      if (Idx < 0 || (Src == V && Idx == (M - i))) {
+        Src = V;
+        Idx = M - i;
+        continue;
+      }
+      return SDValue();
+    }
+
+    if (Idx < 0)
+      return SDValue();
+
+    assert((Idx + Len) <= HalfSize && "Illegal extraction mask");
+    int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+    int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+    return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src,
+                       DAG.getConstant(BitLen, DL, MVT::i8),
+                       DAG.getConstant(BitIdx, DL, MVT::i8));
+  };
+
+  if (SDValue ExtrQ = LowerAsEXTRQ())
+    return ExtrQ;
+
+  // INSERTQ: Extract lowest Len elements from lower half of second source and
+  // insert over first source, starting at Idx.
+  // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... }
+  auto LowerAsInsertQ = [&]() {
+    for (int Idx = 0; Idx != HalfSize; ++Idx) {
+      SDValue Base;
+
+      // Attempt to match first source from mask before insertion point.
+      if (isUndefInRange(Mask, 0, Idx)) {
+        /* EMPTY */
+      } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) {
+        Base = V1;
+      } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) {
+        Base = V2;
+      } else {
+        continue;
+      }
+
+      // Extend the extraction length looking to match both the insertion of
+      // the second source and the remaining elements of the first.
+      for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) {
+        SDValue Insert;
+        int Len = Hi - Idx;
+
+        // Match insertion.
+        if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) {
+          Insert = V1;
+        } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) {
+          Insert = V2;
+        } else {
+          continue;
+        }
+
+        // Match the remaining elements of the lower half.
+        if (isUndefInRange(Mask, Hi, HalfSize - Hi)) {
+          /* EMPTY */
+        } else if ((!Base || (Base == V1)) &&
+                   isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) {
+          Base = V1;
+        } else if ((!Base || (Base == V2)) &&
+                   isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi,
+                                              Size + Hi)) {
+          Base = V2;
+        } else {
+          continue;
+        }
+
+        // We may not have a base (first source) - this can safely be undefined.
+        if (!Base)
+          Base = DAG.getUNDEF(VT);
+
+        int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f;
+        int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f;
+        return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert,
+                           DAG.getConstant(BitLen, DL, MVT::i8),
+                           DAG.getConstant(BitIdx, DL, MVT::i8));
+      }
+    }
+
+    return SDValue();
+  };
+
+  if (SDValue InsertQ = LowerAsInsertQ())
+    return InsertQ;
+
+  return SDValue();
+}
+
 /// \brief Lower a vector shuffle as a zero or any extension.
 ///
 /// Given a specific number of elements, element bit width, and extension
@@ -6864,7 +7098,7 @@ static SDValue lowerVectorShuffleAsShift(SDLoc DL, MVT VT, SDValue V1,
 /// features of the subtarget.
 static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
     SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
-    const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+    ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
   assert(Scale > 1 && "Need a scale to extend.");
   int NumElements = VT.getVectorNumElements();
   int EltBits = VT.getScalarSizeInBits();
@@ -6901,6 +7135,28 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
                         getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
   }
 
+  // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
+  // to 64-bits.
+  if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
+    assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
+    assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+
+    SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+                             DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+                                         DAG.getConstant(EltBits, DL, MVT::i8),
+                                         DAG.getConstant(0, DL, MVT::i8)));
+    if (isUndefInRange(Mask, NumElements/2, NumElements/2))
+      return DAG.getNode(ISD::BITCAST, DL, VT, Lo);
+
+    SDValue Hi =
+        DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+                    DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+                                DAG.getConstant(EltBits, DL, MVT::i8),
+                                DAG.getConstant(EltBits, DL, MVT::i8)));
+    return DAG.getNode(ISD::BITCAST, DL, VT,
+                       DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
+  }
+
   // If this would require more than 2 unpack instructions to expand, use
   // pshufb when available. We can only use more than 2 unpack instructions
   // when zero extending i8 elements which also makes it easier to use pshufb.
@@ -6991,7 +7247,7 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
       return SDValue();
 
     return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
-        DL, VT, Scale, AnyExt, InputV, Subtarget, DAG);
+        DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG);
   };
 
   // The widest scale possible for extending is to a 64-bit integer.
@@ -7166,9 +7422,9 @@ static SDValue lowerVectorShuffleAsElementInsertion(
       V2 = DAG.getBitcast(MVT::v2i64, V2);
       V2 = DAG.getNode(
           X86ISD::VSHLDQ, DL, MVT::v2i64, V2,
-          DAG.getConstant(
-              V2Index * EltVT.getSizeInBits()/8, DL,
-              DAG.getTargetLoweringInfo().getScalarShiftAmountTy(MVT::v2i64)));
+          DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL,
+                          DAG.getTargetLoweringInfo().getScalarShiftAmountTy(
+                              DAG.getDataLayout(), VT)));
       V2 = DAG.getBitcast(VT, V2);
     }
   }
@@ -8518,6 +8774,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
           lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, DAG))
     return Shift;
 
+  // See if we can use SSE4A Extraction / Insertion.
+  if (Subtarget->hasSSE4A())
+    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
+      return V;
+
   // There are special ways we can lower some single-element blends.
   if (NumV2Inputs == 1)
     if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2,
@@ -8670,6 +8931,11 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
           DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
     return ZExt;
 
+  // See if we can use SSE4A Extraction / Insertion.
+  if (Subtarget->hasSSE4A())
+    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
+      return V;
+
   int NumV2Elements =
       std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 16; });
 
@@ -10613,12 +10879,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                                     MaskEltVT.getSizeInBits());
 
       Idx = DAG.getZExtOrTrunc(Idx, dl, MaskEltVT);
+      auto PtrVT = getPointerTy(DAG.getDataLayout());
       SDValue Mask = DAG.getNode(X86ISD::VINSERT, dl, MaskVT,
-                                getZeroVector(MaskVT, Subtarget, DAG, dl),
-                                Idx, DAG.getConstant(0, dl, getPointerTy()));
+                                 getZeroVector(MaskVT, Subtarget, DAG, dl), Idx,
+                                 DAG.getConstant(0, dl, PtrVT));
       SDValue Perm = DAG.getNode(X86ISD::VPERMV, dl, VecVT, Mask, Vec);
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(),
-                        Perm, DAG.getConstant(0, dl, getPointerTy()));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Perm,
+                         DAG.getConstant(0, dl, PtrVT));
     }
     return SDValue();
   }
@@ -11009,17 +11276,16 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   else if (Subtarget->isPICStyleStubPIC())
     OpFlag = X86II::MO_PIC_BASE_OFFSET;
 
-  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
-                                             CP->getAlignment(),
-                                             CP->getOffset(), OpFlag);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Result = DAG.getTargetConstantPool(
+      CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag);
   SDLoc DL(CP);
-  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
   // With PIC, the address is actually $g + Offset.
   if (OpFlag) {
-    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg,
-                                     SDLoc(), getPointerTy()),
-                         Result);
+    Result =
+        DAG.getNode(ISD::ADD, DL, PtrVT,
+                    DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
   }
 
   return Result;
@@ -11042,17 +11308,16 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   else if (Subtarget->isPICStyleStubPIC())
     OpFlag = X86II::MO_PIC_BASE_OFFSET;
 
-  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
-                                          OpFlag);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
   SDLoc DL(JT);
-  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (OpFlag)
-    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg,
-                                     SDLoc(), getPointerTy()),
-                         Result);
+    Result =
+        DAG.getNode(ISD::ADD, DL, PtrVT,
+                    DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
 
   return Result;
 }
@@ -11080,24 +11345,24 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
     OpFlag = X86II::MO_DARWIN_NONLAZY;
   }
 
-  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);
 
   SDLoc DL(Op);
-  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
       !Subtarget->is64Bit()) {
-    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg,
-                                     SDLoc(), getPointerTy()),
-                         Result);
+    Result =
+        DAG.getNode(ISD::ADD, DL, PtrVT,
+                    DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result);
   }
 
   // For symbols that require a load from a stub to get the address, emit the
   // load.
   if (isGlobalStubReference(OpFlag))
-    Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
+    Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
                          MachinePointerInfo::getGOT(), false, false, false, 0);
 
   return Result;
@@ -11112,20 +11377,19 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
   SDLoc dl(Op);
-  SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(), Offset,
-                                             OpFlags);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
 
   if (Subtarget->isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel))
-    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
   else
-    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+    Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (isGlobalRelativeToPICBase(OpFlags)) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
-                         Result);
+    Result = DAG.getNode(ISD::ADD, dl, PtrVT,
+                         DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
   }
 
   return Result;
@@ -11139,40 +11403,40 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
   unsigned char OpFlags =
       Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
   CodeModel::Model M = DAG.getTarget().getCodeModel();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result;
   if (OpFlags == X86II::MO_NO_FLAG &&
       X86::isOffsetSuitableForCodeModel(Offset, M)) {
     // A direct static reference to a global.
-    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+    Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
     Offset = 0;
   } else {
-    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+    Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
   }
 
   if (Subtarget->isPICStyleRIPRel() &&
       (M == CodeModel::Small || M == CodeModel::Kernel))
-    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
   else
-    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+    Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (isGlobalRelativeToPICBase(OpFlags)) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
-                         Result);
+    Result = DAG.getNode(ISD::ADD, dl, PtrVT,
+                         DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result);
   }
 
   // For globals that require a load from a stub to get the address, emit the
   // load.
   if (isGlobalStubReference(OpFlags))
-    Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+    Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
                          MachinePointerInfo::getGOT(), false, false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
   if (Offset != 0)
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
-                         DAG.getConstant(Offset, dl, getPointerTy()));
+    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result,
+                         DAG.getConstant(Offset, dl, PtrVT));
 
   return Result;
 }
@@ -11336,22 +11600,23 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   const GlobalValue *GV = GA->getGlobal();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   if (Subtarget->isTargetELF()) {
     TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
     switch (model) {
       case TLSModel::GeneralDynamic:
         if (Subtarget->is64Bit())
-          return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
-        return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+          return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT);
+        return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT);
       case TLSModel::LocalDynamic:
-        return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
+        return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT,
                                            Subtarget->is64Bit());
       case TLSModel::InitialExec:
       case TLSModel::LocalExec:
-        return LowerToTLSExecModel(
-            GA, DAG, getPointerTy(), model, Subtarget->is64Bit(),
-            DAG.getTarget().getRelocationModel() == Reloc::PIC_);
+        return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget->is64Bit(),
+                                   DAG.getTarget().getRelocationModel() ==
+                                       Reloc::PIC_);
     }
     llvm_unreachable("Unknown TLS model.");
   }
@@ -11374,13 +11639,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
                                                 GA->getValueType(0),
                                                 GA->getOffset(), OpFlag);
-    SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+    SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result);
 
     // With PIC32, the address is actually $g + Offset.
     if (PIC32)
-      Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                           DAG.getNode(X86ISD::GlobalBaseReg,
-                                       SDLoc(), getPointerTy()),
+      Offset = DAG.getNode(ISD::ADD, DL, PtrVT,
+                           DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT),
                            Offset);
 
     // Lowering the machine isd will make sure everything is in the right
@@ -11397,8 +11661,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     // And our return value (tls address) is in the standard call return value
     // location.
     unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
-    return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
-                              Chain.getValue(1));
+    return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1));
   }
 
   if (Subtarget->isTargetKnownWindowsMSVC() ||
@@ -11426,50 +11689,50 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
                                         : Type::getInt32PtrTy(*DAG.getContext(),
                                                               257));
 
-    SDValue TlsArray =
-        Subtarget->is64Bit()
-            ? DAG.getIntPtrConstant(0x58, dl)
-            : (Subtarget->isTargetWindowsGNU()
-                   ? DAG.getIntPtrConstant(0x2C, dl)
-                   : DAG.getExternalSymbol("_tls_array", getPointerTy()));
+    SDValue TlsArray = Subtarget->is64Bit()
+                           ? DAG.getIntPtrConstant(0x58, dl)
+                           : (Subtarget->isTargetWindowsGNU()
+                                  ? DAG.getIntPtrConstant(0x2C, dl)
+                                  : DAG.getExternalSymbol("_tls_array", PtrVT));
 
     SDValue ThreadPointer =
-        DAG.getLoad(getPointerTy(), dl, Chain, TlsArray,
-                    MachinePointerInfo(Ptr), false, false, false, 0);
+        DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr), false,
+                    false, false, 0);
 
     SDValue res;
     if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) {
       res = ThreadPointer;
     } else {
       // Load the _tls_index variable
-      SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
+      SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT);
       if (Subtarget->is64Bit())
-        IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain, IDX,
+        IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX,
                              MachinePointerInfo(), MVT::i32, false, false,
                              false, 0);
       else
-        IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
-                          false, false, false, 0);
+        IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo(), false,
+                          false, false, 0);
 
-      SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()), dl,
-                                      getPointerTy());
-      IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+      auto &DL = DAG.getDataLayout();
+      SDValue Scale =
+          DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, PtrVT);
+      IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale);
 
-      res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
+      res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX);
     }
 
-    res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
-                      false, false, false, 0);
+    res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo(), false, false,
+                      false, 0);
 
     // Get the offset of start of .tls section
     SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
                                              GA->getValueType(0),
                                              GA->getOffset(), X86II::MO_SECREL);
-    SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
+    SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
 
     // The address of the thread local variable is the add of the thread
     // pointer with the offset of the variable.
-    return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
+    return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset);
   }
 
   llvm_unreachable("TLS not implemented for this target.");
@@ -11564,8 +11827,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
 
   unsigned Size = SrcVT.getSizeInBits()/8;
   MachineFunction &MF = DAG.getMachineFunction();
+  auto PtrVT = getPointerTy(MF.getDataLayout());
   int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
-  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                StackSlot,
                                MachinePointerInfo::getFixedStack(SSFI),
@@ -11614,7 +11878,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     MachineFunction &MF = DAG.getMachineFunction();
     unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
     int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
-    SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+    auto PtrVT = getPointerTy(MF.getDataLayout());
+    SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
     Tys = DAG.getVTList(MVT::Other);
     SDValue Ops[] = {
       Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
@@ -11656,7 +11921,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
   // Build some magic constants.
   static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
   Constant *C0 = ConstantDataVector::get(*Context, CV0);
-  SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, 16);
 
   SmallVector<Constant*,2> CV1;
   CV1.push_back(
@@ -11666,7 +11932,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
     ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
                                       APInt(64, 0x4530000000000000ULL))));
   Constant *C1 = ConstantVector::get(CV1);
-  SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+  SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
 
   // Load the 64-bit value into an XMM register.
   SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
@@ -11882,6 +12148,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDValue N0 = Op.getOperand(0);
   SDLoc dl(Op);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   if (Op.getValueType().isVector())
     return lowerUINT_TO_FP_vec(Op, DAG);
@@ -11904,9 +12171,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   // Make a 64-bit buffer, and use it to build an FILD.
   SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
   if (SrcVT == MVT::i32) {
-    SDValue WordOff = DAG.getConstant(4, dl, getPointerTy());
-    SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
-                                     getPointerTy(), StackSlot, WordOff);
+    SDValue WordOff = DAG.getConstant(4, dl, PtrVT);
+    SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, WordOff);
     SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
                                   StackSlot, MachinePointerInfo(),
                                   false, false, 0);
@@ -11940,22 +12206,20 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   APInt FF(32, 0x5F800000ULL);
 
   // Check whether the sign bit is set.
-  SDValue SignSet = DAG.getSetCC(dl,
-                                 getSetCCResultType(*DAG.getContext(), MVT::i64),
-                                 Op.getOperand(0),
-                                 DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
+  SDValue SignSet = DAG.getSetCC(
+      dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
+      Op.getOperand(0), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT);
 
   // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
   SDValue FudgePtr = DAG.getConstantPool(
-                             ConstantInt::get(*DAG.getContext(), FF.zext(64)),
-                                         getPointerTy());
+      ConstantInt::get(*DAG.getContext(), FF.zext(64)), PtrVT);
 
   // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
   SDValue Zero = DAG.getIntPtrConstant(0, dl);
   SDValue Four = DAG.getIntPtrConstant(4, dl);
   SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
                                Zero, Four);
-  FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+  FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset);
 
   // Load the value out, extending it from f32 to f80.
   // FIXME: Avoid the extend by constructing the right constant pool?
@@ -11974,6 +12238,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
   SDLoc DL(Op);
 
   EVT DstTy = Op.getValueType();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
     assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
@@ -11998,7 +12263,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
   MachineFunction &MF = DAG.getMachineFunction();
   unsigned MemSize = DstTy.getSizeInBits()/8;
   int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
-  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
 
   unsigned Opc;
   if (!IsSigned && isIntegerTypeFTOL(DstTy))
@@ -12032,7 +12297,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
     Chain = Value.getValue(1);
     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
-    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+    StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   }
 
   MachineMemOperand *MMO =
@@ -12403,7 +12668,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
   Constant *C = ConstantInt::get(*Context, MaskElt);
   C = ConstantVector::getSplat(NumElts, C);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy());
+  SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                              MachinePointerInfo::getConstantPool(),
@@ -12462,7 +12727,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   CV[0] = ConstantFP::get(*Context,
                           APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1)));
   Constant *C = ConstantVector::get(CV);
-  SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
+  auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+  SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16);
   SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
                               MachinePointerInfo::getConstantPool(),
                               false, false, false, 16);
@@ -12483,7 +12749,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
         APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1)));
   }
   C = ConstantVector::get(CV);
-  CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(), 16);
+  CPIdx = DAG.getConstantPool(C, PtrVT, 16);
   SDValue Val = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                             MachinePointerInfo::getConstantPool(),
                             false, false, false, 16);
@@ -13352,8 +13618,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
   if (hasMinMax) {
     switch (SetCCOpcode) {
     default: break;
-    case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
-    case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
+    case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
+    case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
     }
 
     if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
@@ -14172,8 +14438,8 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
 
   SmallVector<SDValue, 8> Chains;
   SDValue Ptr = Ld->getBasePtr();
-  SDValue Increment =
-      DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl, TLI.getPointerTy());
+  SDValue Increment = DAG.getConstant(SclrLoadTy.getSizeInBits() / 8, dl,
+                                      TLI.getPointerTy(DAG.getDataLayout()));
   SDValue Res = DAG.getUNDEF(LoadUnitVecVT);
 
   for (unsigned i = 0; i < NumLoads; ++i) {
@@ -14613,7 +14879,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   EVT VT = Op.getNode()->getValueType(0);
 
   bool Is64Bit = Subtarget->is64Bit();
-  EVT SPTy = getPointerTy();
+  MVT SPTy = getPointerTy(DAG.getDataLayout());
 
   if (SplitStack) {
     MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -14630,8 +14896,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                              "have nested arguments.");
     }
 
-    const TargetRegisterClass *AddrRegClass =
-      getRegClassFor(getPointerTy());
+    const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
     unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
     Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
     SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@@ -14666,6 +14931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
 SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
+  auto PtrVT = getPointerTy(MF.getDataLayout());
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -14674,8 +14940,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
-    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
-                                   getPointerTy());
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
     return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
                         MachinePointerInfo(SV), false, false, 0);
   }
@@ -14695,8 +14960,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MemOps.push_back(Store);
 
   // Store fp_offset
-  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(4, DL));
+  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
   Store = DAG.getStore(Op.getOperand(0), DL,
                        DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL,
                                        MVT::i32),
@@ -14704,20 +14968,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MemOps.push_back(Store);
 
   // Store ptr to overflow_arg_area
-  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(4, DL));
-  SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
-                                    getPointerTy());
+  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL));
+  SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
                        MachinePointerInfo(SV, 8),
                        false, false, 0);
   MemOps.push_back(Store);
 
   // Store ptr to reg_save_area.
-  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(8, DL));
-  SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
-                                    getPointerTy());
+  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL));
+  SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
   Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
                        MachinePointerInfo(SV, 16), false, false, 0);
   MemOps.push_back(Store);
@@ -14739,7 +14999,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
 
   EVT ArgVT = Op.getNode()->getValueType(0);
   Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-  uint32_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+  uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
   uint8_t ArgMode;
 
   // Decide which area this value should be read from.
@@ -14768,7 +15028,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   SDValue InstOps[] = {Chain, SrcPtr, DAG.getConstant(ArgSize, dl, MVT::i32),
                        DAG.getConstant(ArgMode, dl, MVT::i8),
                        DAG.getConstant(Align, dl, MVT::i32)};
-  SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+  SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other);
   SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
                                           VTs, InstOps, MVT::i64,
                                           MachinePointerInfo(SV),
@@ -14995,6 +15255,20 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
     return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
 }
 
+static int getSEHRegistrationNodeSize(const Function *Fn) {
+  if (!Fn->hasPersonalityFn())
+    report_fatal_error(
+        "querying registration node size for function without personality");
+  // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
+  // WinEHStatePass for the full struct definition.
+  switch (classifyEHPersonality(Fn->getPersonalityFn())) {
+  case EHPersonality::MSVC_X86SEH: return 24;
+  case EHPersonality::MSVC_CXX: return 16;
+  default: break;
+  }
+  report_fatal_error("can only recover FP for MSVC EH personality functions");
+}
+
 /// When the 32-bit MSVC runtime transfers control to us, either to an outlined
 /// function or when returning to a parent frame after catching an exception, we
 /// recover the parent frame pointer by doing arithmetic on the incoming EBP.
@@ -15009,7 +15283,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
   SDLoc dl;
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  MVT PtrVT = TLI.getPointerTy();
+  MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
 
   // It's possible that the parent function no longer has a personality function
   // if the exceptional code was optimized away, in which case we just return
@@ -15017,15 +15291,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
   if (!Fn->hasPersonalityFn())
     return EntryEBP;
 
-  // The RegNodeSize is 6 32-bit words for SEH and 4 for C++ EH. See
-  // WinEHStatePass for the full struct definition.
-  int RegNodeSize;
-  switch (classifyEHPersonality(Fn->getPersonalityFn())) {
-  default:
-    report_fatal_error("can only recover FP for MSVC EH personality functions");
-  case EHPersonality::MSVC_X86SEH: RegNodeSize = 24; break;
-  case EHPersonality::MSVC_CXX: RegNodeSize = 16; break;
-  }
+  int RegNodeSize = getSEHRegistrationNodeSize(Fn);
 
   // Get an MCSymbol that will ultimately resolve to the frame offset of the EH
   // registration.
@@ -15034,7 +15300,7 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
           GlobalValue::getRealLinkageName(Fn->getName()));
   SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
   SDValue RegNodeFrameOffset =
-      DAG.getNode(ISD::FRAME_ALLOC_RECOVER, dl, PtrVT, OffsetSymVal);
+      DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);
 
   // RegNodeBase = EntryEBP - RegNodeSize
   // ParentFP = RegNodeBase - RegNodeFrameOffset
@@ -15059,6 +15325,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     case INTR_TYPE_3OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2), Op.getOperand(3));
+    case INTR_TYPE_4OP:
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+        Op.getOperand(2), Op.getOperand(3), Op.getOperand(4));
     case INTR_TYPE_1OP_MASK_RM: {
       SDValue Src = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
@@ -15143,7 +15412,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue Rnd;
       if (Op.getNumOperands() == 6)
         Rnd = Op.getOperand(5);
-      else 
+      else
         Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
                                               Src1, Src2, Rnd),
@@ -15173,7 +15442,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               Src1, Src2, Src3),
                                   Mask, PassThru, Subtarget, DAG);
     }
-    case VPERM_3OP_MASKZ: 
+    case VPERM_3OP_MASKZ:
     case VPERM_3OP_MASK:
     case FMA_OP_MASK3:
     case FMA_OP_MASKZ:
@@ -15499,6 +15768,19 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
           "llvm.x86.seh.recoverfp must take a function as the first argument");
     return recoverFramePointer(DAG, Fn, IncomingFPOp);
   }
+
+  case Intrinsic::localaddress: {
+    // Returns one of the stack, base, or frame pointer registers, depending on
+    // which is used to reference local variables.
+    MachineFunction &MF = DAG.getMachineFunction();
+    const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+    unsigned Reg;
+    if (RegInfo->hasBasePointer(MF))
+      Reg = RegInfo->getBaseRegister();
+    else // This function handles the SP or FP case.
+      Reg = RegInfo->getPtrSizedFrameRegister(MF);
+    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+  }
   }
 }
 
@@ -15712,34 +15994,60 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
 static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
                                     SelectionDAG &DAG) {
   MachineFunction &MF = DAG.getMachineFunction();
+  const Function *Fn = MF.getFunction();
   SDLoc dl(Op);
   SDValue Chain = Op.getOperand(0);
 
+  assert(Subtarget->getFrameLowering()->hasFP(MF) &&
+         "using llvm.x86.seh.restoreframe requires a frame pointer");
+
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  MVT VT = TLI.getPointerTy();
+  MVT VT = TLI.getPointerTy(DAG.getDataLayout());
 
   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   unsigned FrameReg =
       RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
   unsigned SPReg = RegInfo->getStackRegister();
+  unsigned SlotSize = RegInfo->getSlotSize();
 
   // Get incoming EBP.
   SDValue IncomingEBP =
       DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
 
-  // Load [EBP-24] into SP.
-  SDValue SPAddr =
-      DAG.getNode(ISD::ADD, dl, VT, IncomingEBP, DAG.getConstant(-24, dl, VT));
+  // SP is saved in the first field of every registration node, so load
+  // [EBP-RegNodeSize] into SP.
+  int RegNodeSize = getSEHRegistrationNodeSize(Fn);
+  SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP,
+                               DAG.getConstant(-RegNodeSize, dl, VT));
   SDValue NewSP =
       DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
                   false, VT.getScalarSizeInBits() / 8);
   Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
 
-  // FIXME: Restore the base pointer in case of stack realignment!
+  if (!RegInfo->needsStackRealignment(MF)) {
+    // Adjust EBP to point back to the original frame position.
+    SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP);
+    Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
+  } else {
+    assert(RegInfo->hasBasePointer(MF) &&
+           "functions with Win32 EH must use frame or base pointer register");
+
+    // Reload the base pointer (ESI) with the adjusted incoming EBP.
+    SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP);
+    Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP);
+
+    // Reload the spilled EBP value, now that the stack and base pointers are
+    // set up.
+    X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+    X86FI->setHasSEHFramePtrSave(true);
+    int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize);
+    X86FI->setSEHFramePtrSaveIndex(FI);
+    SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT),
+                                MachinePointerInfo(), false, false, false,
+                                VT.getScalarSizeInBits() / 8);
+    Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP);
+  }
 
-  // Adjust EBP to point back to the original frame position.
-  SDValue NewFP = recoverFramePointer(DAG, MF.getFunction(), IncomingEBP);
-  Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
   return Chain;
 }
 
@@ -15910,7 +16218,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
 
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   SDLoc dl(Op);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -15969,14 +16277,36 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
 
 // FIXME? Maybe this could be a TableGen attribute on some registers and
 // this table could be generated automatically from RegInfo.
-unsigned X86TargetLowering::getRegisterByName(const char* RegName,
-                                              EVT VT) const {
+unsigned X86TargetLowering::getRegisterByName(const char* RegName, EVT VT,
+                                              SelectionDAG &DAG) const {
+  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
+  const MachineFunction &MF = DAG.getMachineFunction();
+
   unsigned Reg = StringSwitch<unsigned>(RegName)
                        .Case("esp", X86::ESP)
                        .Case("rsp", X86::RSP)
+                       .Case("ebp", X86::EBP)
+                       .Case("rbp", X86::RBP)
                        .Default(0);
+
+  if (Reg == X86::EBP || Reg == X86::RBP) {
+    if (!TFI.hasFP(MF))
+      report_fatal_error("register " + StringRef(RegName) +
+                         " is allocatable: function has no frame pointer");
+#ifndef NDEBUG
+    else {
+      const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+      unsigned FrameReg =
+          RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
+      assert((FrameReg == X86::EBP || FrameReg == X86::RBP) &&
+             "Invalid Frame Register!");
+    }
+#endif
+  }
+
   if (Reg)
     return Reg;
+
   report_fatal_error("Invalid register name global variable");
 }
 
@@ -15992,7 +16322,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Handler   = Op.getOperand(2);
   SDLoc dl      (Op);
 
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
   assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
@@ -16211,7 +16541,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
 
   // Save FP Control Word to stack slot
   int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
-  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+  SDValue StackSlot =
+      DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
 
   MachineMemOperand *MMO =
    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
@@ -16572,7 +16903,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
   }
 
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
-                                         getPointerTy());
+                                         getPointerTy(DAG.getDataLayout()));
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(InChain)
@@ -16642,9 +16973,9 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
   // If we have a signed multiply but no PMULDQ fix up the high parts of a
   // unsigned multiply.
   if (IsSigned && !Subtarget->hasSSE41()) {
-    SDValue ShAmt =
-        DAG.getConstant(31, dl,
-                        DAG.getTargetLoweringInfo().getShiftAmountTy(VT));
+    SDValue ShAmt = DAG.getConstant(
+        31, dl,
+        DAG.getTargetLoweringInfo().getShiftAmountTy(VT, DAG.getDataLayout()));
     SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
                              DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
     SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
@@ -16717,6 +17048,38 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   unsigned X86Opc = (Op.getOpcode() == ISD::SHL) ? X86ISD::VSHLI :
     (Op.getOpcode() == ISD::SRL) ? X86ISD::VSRLI : X86ISD::VSRAI;
 
+  auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) {
+    assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type");
+    MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2);
+    SDValue Ex = DAG.getBitcast(ExVT, R);
+
+    if (ShiftAmt >= 32) {
+      // Splat sign to upper i32 dst, and SRA upper i32 src to lower i32.
+      SDValue Upper =
+          getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG);
+      SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
+                                                 ShiftAmt - 32, DAG);
+      if (VT == MVT::v2i64)
+        Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3});
+      if (VT == MVT::v4i64)
+        Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
+                                  {9, 1, 11, 3, 13, 5, 15, 7});
+    } else {
+      // SRA upper i32, SHL whole i64 and select lower i32.
+      SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex,
+                                                 ShiftAmt, DAG);
+      SDValue Lower =
+          getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG);
+      Lower = DAG.getBitcast(ExVT, Lower);
+      if (VT == MVT::v2i64)
+        Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3});
+      if (VT == MVT::v4i64)
+        Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower,
+                                  {8, 1, 10, 3, 12, 5, 14, 7});
+    }
+    return DAG.getBitcast(VT, Ex);
+  };
+
   // Optimize shl/srl/sra with constant shift amount.
   if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
     if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
@@ -16725,6 +17088,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
       if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
         return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
 
+      // i64 SRA needs to be performed as partial shifts.
+      if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
+          Op.getOpcode() == ISD::SRA)
+        return ArithmeticShiftRight64(ShiftAmt);
+
       if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
         unsigned NumElts = VT.getVectorNumElements();
         MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
@@ -16808,7 +17176,12 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
       if (ShAmt != ShiftAmt)
         return SDValue();
     }
-    return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+
+    if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+      return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
+
+    if (Op.getOpcode() == ISD::SRA)
+      return ArithmeticShiftRight64(ShiftAmt);
   }
 
   return SDValue();
@@ -16890,7 +17263,9 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
         if (Vals[j] != Amt.getOperand(i + j))
           return SDValue();
     }
-    return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
+
+    if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
+      return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
   }
   return SDValue();
 }
@@ -17042,6 +17417,53 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     }
   }
 
+  // v4i32 Non Uniform Shifts.
+  // If the shift amount is constant we can shift each lane using the SSE2
+  // immediate shifts, else we need to zero-extend each lane to the lower i64
+  // and shift using the SSE2 variable shifts.
+  // The separate results can then be blended together.
+  if (VT == MVT::v4i32) {
+    unsigned Opc = Op.getOpcode();
+    SDValue Amt0, Amt1, Amt2, Amt3;
+    if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+      Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0});
+      Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1});
+      Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2});
+      Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3});
+    } else {
+      // ISD::SHL is handled above but we include it here for completeness.
+      switch (Opc) {
+      default:
+        llvm_unreachable("Unknown target vector shift node");
+      case ISD::SHL:
+        Opc = X86ISD::VSHL;
+        break;
+      case ISD::SRL:
+        Opc = X86ISD::VSRL;
+        break;
+      case ISD::SRA:
+        Opc = X86ISD::VSRA;
+        break;
+      }
+      // The SSE2 shifts use the lower i64 as the same shift amount for
+      // all lanes and the upper i64 is ignored. These shuffle masks
+      // optimally zero-extend each lanes on SSE2/SSE41/AVX targets.
+      SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
+      Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1});
+      Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1});
+      Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1});
+      Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1});
+    }
+
+    SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0);
+    SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1);
+    SDValue R2 = DAG.getNode(Opc, dl, VT, R, Amt2);
+    SDValue R3 = DAG.getNode(Opc, dl, VT, R, Amt3);
+    SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1});
+    SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7});
+    return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
+  }
+
   if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
     MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
     unsigned ShiftOpcode = Op->getOpcode();
@@ -17944,7 +18366,8 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
   // the results are returned via SRet in memory.
   const char *LibcallName =  isF64 ? "__sincos_stret" : "__sincosf_stret";
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SDValue Callee = DAG.getExternalSymbol(LibcallName, TLI.getPointerTy());
+  SDValue Callee =
+      DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = isF64
     ? (Type*)StructType::get(ArgTy, ArgTy, nullptr)
@@ -18443,10 +18866,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::HSUB:               return "X86ISD::HSUB";
   case X86ISD::FHADD:              return "X86ISD::FHADD";
   case X86ISD::FHSUB:              return "X86ISD::FHSUB";
-  case X86ISD::UMAX:               return "X86ISD::UMAX";
-  case X86ISD::UMIN:               return "X86ISD::UMIN";
-  case X86ISD::SMAX:               return "X86ISD::SMAX";
-  case X86ISD::SMIN:               return "X86ISD::SMIN";
   case X86ISD::ABS:                return "X86ISD::ABS";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
@@ -18456,6 +18875,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FMINC:              return "X86ISD::FMINC";
   case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
   case X86ISD::FRCP:               return "X86ISD::FRCP";
+  case X86ISD::EXTRQI:             return "X86ISD::EXTRQI";
+  case X86ISD::INSERTQI:           return "X86ISD::INSERTQI";
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
   case X86ISD::TLSBASEADDR:        return "X86ISD::TLSBASEADDR";
   case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
@@ -18478,6 +18899,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
   case X86ISD::CVTDQ2PD:           return "X86ISD::CVTDQ2PD";
+  case X86ISD::CVTUDQ2PD:          return "X86ISD::CVTUDQ2PD";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
@@ -18594,16 +19016,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::ADDS:               return "X86ISD::ADDS";
   case X86ISD::SUBS:               return "X86ISD::SUBS";
   case X86ISD::AVG:                return "X86ISD::AVG";
+  case X86ISD::MULHRS:             return "X86ISD::MULHRS";
   case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
   case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
+  case X86ISD::FP_TO_SINT_RND:     return "X86ISD::FP_TO_SINT_RND";
+  case X86ISD::FP_TO_UINT_RND:     return "X86ISD::FP_TO_UINT_RND";
   }
   return nullptr;
 }
 
 // isLegalAddressingMode - Return true if the addressing mode represented
 // by AM is legal for this target, for a load/store of the specified type.
-bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                              Type *Ty,
+bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                              const AddrMode &AM, Type *Ty,
                                               unsigned AS) const {
   // X86 supports extremely general addressing modes.
   CodeModel::Model M = getTargetMachine().getCodeModel();
@@ -19555,7 +19980,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
 
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const TargetRegisterClass *AddrRegClass =
-    getRegClassFor(getPointerTy());
+      getRegClassFor(getPointerTy(MF->getDataLayout()));
 
   unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
     bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
@@ -19750,7 +20175,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
 
   MemOpndSlot = CurOp;
 
-  MVT PVT = getPointerTy();
+  MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
          "Invalid Pointer Size!");
 
@@ -19882,7 +20307,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
 
-  MVT PVT = getPointerTy();
+  MVT PVT = getPointerTy(MF->getDataLayout());
   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
          "Invalid Pointer Size!");
 
@@ -21377,7 +21802,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
   // alignment is valid.
   unsigned Align = LN0->getAlignment();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
       EltVT.getTypeForEVT(*DAG.getContext()));
 
   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, EltVT))
@@ -21513,14 +21938,15 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
 
   if (TLI.isOperationLegal(ISD::SRA, MVT::i64)) {
     SDValue Cst = DAG.getBitcast(MVT::v2i64, InputVector);
-    EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
+    auto &DL = DAG.getDataLayout();
+    EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(DL);
     SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
       DAG.getConstant(0, dl, VecIdxTy));
     SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Cst,
       DAG.getConstant(1, dl, VecIdxTy));
 
-    SDValue ShAmt = DAG.getConstant(32, dl,
-      DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64));
+    SDValue ShAmt = DAG.getConstant(
+        32, dl, DAG.getTargetLoweringInfo().getShiftAmountTy(MVT::i64, DL));
     Vals[0] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
     Vals[1] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
       DAG.getNode(ISD::SRA, dl, MVT::i64, BottomHalf, ShAmt));
@@ -21539,10 +21965,11 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
     // Replace each use (extract) with a load of the appropriate element.
     for (unsigned i = 0; i < 4; ++i) {
       uint64_t Offset = EltSize * i;
-      SDValue OffsetVal = DAG.getConstant(Offset, dl, TLI.getPointerTy());
+      auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+      SDValue OffsetVal = DAG.getConstant(Offset, dl, PtrVT);
 
-      SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
-                                       StackPtr, OffsetVal);
+      SDValue ScalarAddr =
+          DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, OffsetVal);
 
       // Load the scalar.
       Vals[i] = DAG.getLoad(ElementType, dl, Ch,
@@ -21622,16 +22049,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
     default: break;
     case ISD::SETULT:
     case ISD::SETULE:
-      Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
+      Opc = hasUnsigned ? ISD::UMIN : 0; break;
     case ISD::SETUGT:
     case ISD::SETUGE:
-      Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
+      Opc = hasUnsigned ? ISD::UMAX : 0; break;
     case ISD::SETLT:
     case ISD::SETLE:
-      Opc = hasSigned ? X86ISD::SMIN : 0u; break;
+      Opc = hasSigned ? ISD::SMIN : 0; break;
     case ISD::SETGT:
     case ISD::SETGE:
-      Opc = hasSigned ? X86ISD::SMAX : 0u; break;
+      Opc = hasSigned ? ISD::SMAX : 0; break;
     }
   // Check for x CC y ? y : x -- a min/max with reversed arms.
   } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
@@ -21640,16 +22067,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
     default: break;
     case ISD::SETULT:
     case ISD::SETULE:
-      Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
+      Opc = hasUnsigned ? ISD::UMAX : 0; break;
     case ISD::SETUGT:
     case ISD::SETUGE:
-      Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
+      Opc = hasUnsigned ? ISD::UMIN : 0; break;
     case ISD::SETLT:
     case ISD::SETLE:
-      Opc = hasSigned ? X86ISD::SMAX : 0u; break;
+      Opc = hasSigned ? ISD::SMAX : 0; break;
     case ISD::SETGT:
     case ISD::SETGE:
-      Opc = hasSigned ? X86ISD::SMIN : 0u; break;
+      Opc = hasSigned ? ISD::SMIN : 0; break;
     }
   }
 
@@ -22106,7 +22533,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // Check if the selector will be produced by CMPP*/PCMP*
         Cond.getOpcode() == ISD::SETCC &&
         // Check if SETCC has already been promoted
-        TLI.getSetCCResultType(*DAG.getContext(), VT) == CondVT) {
+        TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
+            CondVT) {
       bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
       bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
 
@@ -22826,7 +23254,7 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
       // We shift all of the values by one. In many cases we do not have
       // hardware support for this operation. This is better expressed as an ADD
       // of two values.
-      if (N1SplatC->getZExtValue() == 1)
+      if (N1SplatC->getAPIntValue() == 1)
         return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
     }
 
@@ -23478,7 +23906,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
       return SDValue();
 
     SDValue Ptr = Ld->getBasePtr();
-    SDValue Increment = DAG.getConstant(16, dl, TLI.getPointerTy());
+    SDValue Increment =
+        DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout()));
 
     EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
                                   NumElems/2);
@@ -23687,7 +24116,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     SDValue Value0 = Extract128BitVector(StoredVal, 0, DAG, dl);
     SDValue Value1 = Extract128BitVector(StoredVal, NumElems/2, DAG, dl);
 
-    SDValue Stride = DAG.getConstant(16, dl, TLI.getPointerTy());
+    SDValue Stride =
+        DAG.getConstant(16, dl, TLI.getPointerTy(DAG.getDataLayout()));
     SDValue Ptr0 = St->getBasePtr();
     SDValue Ptr1 = DAG.getNode(ISD::ADD, dl, Ptr0.getValueType(), Ptr0, Stride);
 
@@ -23760,8 +24190,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
     SDValue ShuffWide = DAG.getBitcast(StoreVecVT, Shuff);
     SmallVector<SDValue, 8> Chains;
-    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, dl,
-                                        TLI.getPointerTy());
+    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, dl,
+                                        TLI.getPointerTy(DAG.getDataLayout()));
     SDValue Ptr = St->getBasePtr();
 
     // Perform one or more big stores into memory.
@@ -24659,6 +25089,31 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformUINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+                                        const X86Subtarget *Subtarget) {
+  SDValue Op0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT InVT = Op0.getValueType();
+  EVT InSVT = InVT.getScalarType();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
+  // UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
+  if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
+    SDLoc dl(N);
+    EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
+                                 InVT.getVectorNumElements());
+    SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+
+    if (TLI.isOperationLegal(ISD::UINT_TO_FP, DstVT))
+      return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+
+    return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
+  }
+
+  return SDValue();
+}
+
 static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
                                         const X86Subtarget *Subtarget) {
   // First try to optimize away the conversion entirely when it's
@@ -24913,6 +25368,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case ISD::MSTORE:         return PerformMSTORECombine(N, DAG, Subtarget);
   case ISD::SINT_TO_FP:     return PerformSINT_TO_FPCombine(N, DAG, Subtarget);
+  case ISD::UINT_TO_FP:     return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
   case ISD::FADD:           return PerformFADDCombine(N, DAG, Subtarget);
   case ISD::FSUB:           return PerformFSUBCombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
@@ -25135,7 +25591,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
         (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
          matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
       AsmPieces.clear();
-      const std::string &ConstraintsStr = IA->getConstraintString();
+      StringRef ConstraintsStr = IA->getConstraintString();
       SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
       array_pod_sort(AsmPieces.begin(), AsmPieces.end());
       if (clobbersFlagRegisters(AsmPieces))
@@ -25149,7 +25605,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
         matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
         matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
       AsmPieces.clear();
-      const std::string &ConstraintsStr = IA->getConstraintString();
+      StringRef ConstraintsStr = IA->getConstraintString();
       SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
       array_pod_sort(AsmPieces.begin(), AsmPieces.end());
       if (clobbersFlagRegisters(AsmPieces))
@@ -25176,7 +25632,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
 /// getConstraintType - Given a constraint letter, return the type of
 /// constraint it is for this target.
 X86TargetLowering::ConstraintType
-X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+X86TargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'R':
@@ -25508,7 +25964,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 
 std::pair<unsigned, const TargetRegisterClass *>
 X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                                const std::string &Constraint,
+                                                StringRef Constraint,
                                                 MVT VT) const {
   // First, see if this is a constraint that directly corresponds to an LLVM
   // register class.
@@ -25717,8 +26173,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   return Res;
 }
 
-int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
-                                            Type *Ty,
+int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
+                                            const AddrMode &AM, Type *Ty,
                                             unsigned AS) const {
   // Scaling factors are not free at all.
   // An indexed folded instruction, i.e., inst (reg1, reg2, scale),
@@ -25738,7 +26194,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
   // E.g., on Haswell:
   // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
   // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
-  if (isLegalAddressingMode(AM, Ty, AS))
+  if (isLegalAddressingMode(DL, AM, Ty, AS))
     // Scale represents reg2 * scale, thus account for 1
     // as soon as we use a second register.
     return AM.Scale != 0;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 17660891635c..723d5304495c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -233,12 +233,6 @@ namespace llvm {
       /// Floating point horizontal sub.
       FHSUB,
 
-      /// Unsigned integer max and min.
-      UMAX, UMIN,
-
-      /// Signed integer max and min.
-      SMAX, SMIN,
-
       // Integer absolute value
       ABS,
 
@@ -298,8 +292,8 @@ namespace llvm {
       // Vector FP round.
       VFPROUND,
 
-      // Vector signed integer to double.
-      CVTDQ2PD,
+      // Vector signed/unsigned integer to double.
+      CVTDQ2PD, CVTUDQ2PD,
 
       // 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
@@ -400,10 +394,15 @@ namespace llvm {
       VINSERT,
       VEXTRACT,
 
+      /// SSE4A Extraction and Insertion.
+      EXTRQI, INSERTQI,
+
       // Vector multiply packed unsigned doubleword integers
       PMULUDQ,
       // Vector multiply packed signed doubleword integers
       PMULDQ,
+      // Vector Multiply Packed UnsignedIntegers with Round and Scale
+      MULHRS,
 
       // FMA nodes
       FMADD,
@@ -429,6 +428,9 @@ namespace llvm {
       //with rounding mode
       SINT_TO_FP_RND,
       UINT_TO_FP_RND,
+
+      // Vector float/double to signed/unsigned integer.
+      FP_TO_SINT_RND, FP_TO_UINT_RND,
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
       VASTART_SAVE_XMM_REGS,
@@ -599,7 +601,9 @@ namespace llvm {
     unsigned getJumpTableEncoding() const override;
     bool useSoftFloat() const override;
 
-    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
+    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+      return MVT::i8;
+    }
 
     const MCExpr *
     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
@@ -617,7 +621,8 @@ namespace llvm {
     /// function arguments in the caller parameter area. For X86, aggregates
     /// that contains are placed at 16-byte boundaries while the rest are at
     /// 4-byte boundaries.
-    unsigned getByValTypeAlignment(Type *Ty) const override;
+    unsigned getByValTypeAlignment(Type *Ty,
+                                   const DataLayout &DL) const override;
 
     /// Returns the target specific optimal type for load
     /// and store operations as a result of memset, memcpy, and memmove
@@ -685,7 +690,8 @@ namespace llvm {
     bool isCheapToSpeculateCtlz() const override;
 
     /// Return the value type to use for ISD::SETCC.
-    EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                           EVT VT) const override;
 
     /// Determine which of the bits specified in Mask are known to be either
     /// zero or one and return them in the KnownZero/KnownOne bitsets.
@@ -707,8 +713,7 @@ namespace llvm {
 
     bool ExpandInlineAsm(CallInst *CI) const override;
 
-    ConstraintType
-      getConstraintType(const std::string &Constraint) const override;
+    ConstraintType getConstraintType(StringRef Constraint) const override;
 
     /// Examine constraint string and operand type and determine a weight value.
     /// The operand object must already have been set up with the operand type.
@@ -726,8 +731,8 @@ namespace llvm {
                                       std::vector<SDValue> &Ops,
                                       SelectionDAG &DAG) const override;
 
-    unsigned getInlineAsmMemConstraint(
-        const std::string &ConstraintCode) const override {
+    unsigned
+    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
       if (ConstraintCode == "i")
         return InlineAsm::Constraint_i;
       else if (ConstraintCode == "o")
@@ -745,13 +750,12 @@ namespace llvm {
     /// error, this returns a register number of 0.
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
     /// Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
-                               unsigned AS) const override;
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
 
     /// Return true if the specified immediate is legal
     /// icmp immediate, that is the target has icmp instructions which can
@@ -770,7 +774,7 @@ namespace llvm {
     /// of the specified type.
     /// If the AM is supported, the return value must be >= 0.
     /// If the AM is not supported, it returns a negative value.
-    int getScalingFactorCost(const AddrMode &AM, Type *Ty,
+    int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
 
     bool isVectorShiftByScalarCheap(Type *Ty) const override;
@@ -872,7 +876,8 @@ namespace llvm {
       return nullptr; // nothing to do, move along.
     }
 
-    unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+    unsigned getRegisterByName(const char* RegName, EVT VT,
+                               SelectionDAG &DAG) const override;
 
     /// This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index b309b8210851..faa91500b181 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -3136,6 +3136,12 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P,
+                                    HasBWI, 1>;
+defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P, 
+                                    HasBWI, 1>;
+defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P, 
+                                    HasBWI, 1>, T8PD;
 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
                                    
@@ -3230,32 +3236,32 @@ let Predicates = [HasBWI] in {
   defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
 }
 
-defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax,
+defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
-defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax,
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax,
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax,
+defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
-defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin,
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin,
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin,
+defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
-defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin,
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
+defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
 //===----------------------------------------------------------------------===//
@@ -4035,7 +4041,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
               !strconcat("$src2, ${src3}", _.BroadcastStr ),
               (OpNode _.RC:$src1,
-               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,	
+               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
               AVX512FMA3Base, EVEX_B;
   }
 }
@@ -4394,16 +4400,16 @@ def : Pat<(f64 (sint_to_fp GR32:$src)),
 def : Pat<(f64 (sint_to_fp GR64:$src)),
           (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
 
-defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR32,
+defm VCVTUSI2SSZ   : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
                                   v4f32x_info, i32mem, loadi32,
                                   "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
                                   v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
                                   XS, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86SuintToFpRnd, GR32, v2f64x_info,
+defm VCVTUSI2SDZ   : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
                                   i32mem, loadi32, "cvtusi2sd{l}">,
                                   XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
-defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86SuintToFpRnd, GR64,
+defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
@@ -4604,117 +4610,389 @@ def : Pat<(extloadf32 addr:$src),
 def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
            Requires<[HasAVX512]>;
 
-multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
-               RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
-               X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
-               Domain d> {
-let hasSideEffects = 0 in {
-  def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst,
-                (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
-  def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
-              !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
-              [], d>, EVEX, EVEX_B, EVEX_RC;
-  let mayLoad = 1 in
-  def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst,
-                (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
-} // hasSideEffects = 0
+//===----------------------------------------------------------------------===//
+// AVX-512  Vector convert from signed/unsigned integer to float/double
+//          and from float/double to signed/unsigned integer
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNode,
+                         string Broadcast = _.BroadcastStr,
+                         string Alias = ""> {
+
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
+                         (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
+
+  defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _Src.MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+                         (_.VT (OpNode (_Src.VT
+                             (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
+
+  defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _Src.MemOp:$src), OpcodeStr,
+                         "${src}"##Broadcast, "${src}"##Broadcast,
+                         (_.VT (OpNode (_Src.VT
+                                  (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
+                            ))>, EVEX, EVEX_B;
+}
+// Coversion with SAE - suppress all exceptions
+multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                        (ins _Src.RC:$src), OpcodeStr,
+                        "{sae}, $src", "$src, {sae}",
+                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
+                               (i32 FROUND_NO_EXC)))>,
+                        EVEX, EVEX_B;
 }
 
-multiclass avx512_vcvt_fp<bits<8> opc, string asm, RegisterClass SrcRC,
-               RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
-               X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
-               Domain d> {
-let hasSideEffects = 0 in {
-  def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst,
-                (OpVT (OpNode (InVT SrcRC:$src))))], d>, EVEX;
-  let mayLoad = 1 in
-  def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst,
-                (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))], d>, EVEX;
-} // hasSideEffects = 0
+// Conversion with rounding control (RC)
+multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+  defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                        (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
+                        "$rc, $src", "$src, $rc",
+                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
+                        EVEX, EVEX_B, EVEX_RC;
 }
 
-defm VCVTPD2PSZ : avx512_vcvt_fp_with_rc<0x5A, "vcvtpd2ps", VR512, VR256X, fround,
-                                loadv8f64, f512mem, v8f32, v8f64,
-                                SSEPackedSingle>, EVEX_V512, VEX_W, PD,
-                                EVEX_CD8<64, CD8VF>;
+// Extend Float to Double
+multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fextend>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
+                                X86vfpextRnd>, EVEX_V512;
+  }
+  let Predicates = [HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
+                               X86vfpext, "{1to2}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fextend>,
+                                     EVEX_V256;
+  }
+}
+
+// Truncate Double to Float
+multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fround>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
+                               X86vfproundRnd>, EVEX_V512;
+  }
+  let Predicates = [HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
+                               X86vfpround, "{1to2}", "{x}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fround,
+                               "{1to4}", "{y}">, EVEX_V256;
+  }
+}
+
+defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
+                                  VEX_W, PD, EVEX_CD8<64, CD8VF>;
+defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
+                                  PS, EVEX_CD8<32, CD8VH>;
 
-defm VCVTPS2PDZ : avx512_vcvt_fp<0x5A, "vcvtps2pd", VR256X, VR512, fextend,
-                                loadv4f64, f256mem, v8f64, v8f32,
-                                SSEPackedDouble>, EVEX_V512, PS,
-                                EVEX_CD8<32, CD8VH>;
 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
             (VCVTPS2PDZrm addr:$src)>;
 
-def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
-                   (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
-          (VCVTPD2PSZrr VR512:$src)>;
+let Predicates = [HasVLX] in {
+  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
+              (VCVTPS2PDZ256rm addr:$src)>;
+}
 
-def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
-                   (bc_v8f32(v8i32 immAllZerosV)), (i8 -1), imm:$rc)),
-          (VCVTPD2PSZrrb VR512:$src, imm:$rc)>;
+// Convert Signed/Unsigned Doubleword to Double
+multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDNode OpNode128> {
+  // No rounding in this op
+  let Predicates = [HasAVX512] in
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
+                                     EVEX_V512;
 
-//===----------------------------------------------------------------------===//
-// AVX-512  Vector convert from sign integer to float/double
-//===----------------------------------------------------------------------===//
+  let Predicates = [HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
+                                     OpNode128, "{1to2}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
+                                     EVEX_V256;
+  }
+}
 
-defm VCVTDQ2PSZ : avx512_vcvt_fp_with_rc<0x5B, "vcvtdq2ps", VR512, VR512, sint_to_fp,
-                                loadv8i64, i512mem, v16f32, v16i32,
-                                SSEPackedSingle>, EVEX_V512, PS,
-                                EVEX_CD8<32, CD8VF>;
+// Convert Signed/Unsigned Doubleword to Float
+multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           SDNode OpNodeRnd> {
+  let Predicates = [HasAVX512] in
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
+                               OpNodeRnd>, EVEX_V512;
+
+  let Predicates = [HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
+                                     EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
+                                     EVEX_V256;
+  }
+}
+
+// Convert Float to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
+                                OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
+                                     EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
+                                     EVEX_V256;
+  }
+}
+
+// Convert Float to Signed/Unsigned Doubleword
+multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
+                                OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
+                                     EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
+                                     EVEX_V256;
+  }
+}
+
+// Convert Double to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
+                                OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasVLX] in {
+    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+    // memory forms of these instructions in Asm Parcer. They have the same
+    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+    // due to the same reason.
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
+                               "{1to2}", "{x}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
+                               "{1to4}", "{y}">, EVEX_V256;
+  }
+}
+
+// Convert Double to Signed/Unsigned Doubleword
+multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
+                               OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasVLX] in {
+    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+    // memory forms of these instructions in Asm Parcer. They have the same
+    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+    // due to the same reason.
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
+                               "{1to2}", "{x}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
+                               "{1to4}", "{y}">, EVEX_V256;
+  }
+}
+
+// Convert Double to Signed/Unsigned Quardword
+multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasDQI] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
+                               OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasDQI, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
+                               EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
+                               EVEX_V256;
+  }
+}
 
-defm VCVTDQ2PDZ : avx512_vcvt_fp<0xE6, "vcvtdq2pd", VR256X, VR512, sint_to_fp,
-                                loadv4i64, i256mem, v8f64, v8i32,
-                                SSEPackedDouble>, EVEX_V512, XS,
+// Convert Double to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasDQI] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
+                               OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasDQI, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
+                               EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
+                               EVEX_V256;
+  }
+}
+
+// Convert Signed/Unsigned Quardword to Double
+multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasDQI] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
+                               OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasDQI, HasVLX] in {
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
+                               EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
+                               EVEX_V256;
+  }
+}
+
+// Convert Float to Signed/Unsigned Quardword
+multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasDQI] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
+                               OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasDQI, HasVLX] in {
+    // Explicitly specified broadcast string, since we take only 2 elements
+    // from v4f32x_info source
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
+                               "{1to2}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
+                               EVEX_V256;
+  }
+}
+
+// Convert Float to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasDQI] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
+             avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
+                               OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasDQI, HasVLX] in {
+    // Explicitly specified broadcast string, since we take only 2 elements
+    // from v4f32x_info source
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
+                               "{1to2}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
+                               EVEX_V256;
+  }
+}
+
+// Convert Signed/Unsigned Quardword to Float
+multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr,
+                                  SDNode OpNode, SDNode OpNodeRnd> {
+  let Predicates = [HasDQI] in {
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
+             avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
+                               OpNodeRnd>, EVEX_V512;
+  }
+  let Predicates = [HasDQI, HasVLX] in {
+    // we need "x"/"y" suffixes in order to distinguish between 128 and 256
+    // memory forms of these instructions in Asm Parcer. They have the same
+    // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
+    // due to the same reason.
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode,
+                               "{1to2}", "{x}">, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
+                               "{1to4}", "{y}">, EVEX_V256;
+  }
+}
+
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS,
                                 EVEX_CD8<32, CD8VH>;
 
-defm VCVTTPS2DQZ : avx512_vcvt_fp<0x5B, "vcvttps2dq", VR512, VR512, fp_to_sint,
-                                 loadv16f32, f512mem, v16i32, v16f32,
-                                 SSEPackedSingle>, EVEX_V512, XS,
+defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
+                                X86VSintToFpRnd>,
+                                PS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
+                                X86VFpToSintRnd>,
+                                XS, EVEX_CD8<32, CD8VF>;
+
+defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,
+                                 X86VFpToSintRnd>,
+                                 PD, VEX_W, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
+                                 X86VFpToUintRnd>, PS,
                                  EVEX_CD8<32, CD8VF>;
 
-defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
-                                 loadv8f64, f512mem, v8i32, v8f64,
-                                 SSEPackedDouble>, EVEX_V512, PD, VEX_W,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
+                                 X86VFpToUintRnd>, PS, VEX_W,
                                  EVEX_CD8<64, CD8VF>;
 
-defm VCVTTPS2UDQZ : avx512_vcvt_fp<0x78, "vcvttps2udq", VR512, VR512, fp_to_uint,
-                                 loadv16f32, f512mem, v16i32, v16f32,
-                                 SSEPackedSingle>, EVEX_V512, PS,
+defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
+                                 XS, EVEX_CD8<32, CD8VH>;
+
+defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
+                                 X86VUintToFpRnd>, XD,
                                  EVEX_CD8<32, CD8VF>;
 
-// cvttps2udq (src, 0, mask-all-ones, sae-current)
-def : Pat<(v16i32 (int_x86_avx512_mask_cvttps2udq_512 (v16f32 VR512:$src),
-                   (v16i32 immAllZerosV), (i16 -1), FROUND_CURRENT)),
-          (VCVTTPS2UDQZrr VR512:$src)>;
+defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtps2Int,
+                                 X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VF>;
 
-defm VCVTTPD2UDQZ : avx512_vcvt_fp<0x78, "vcvttpd2udq", VR512, VR256X, fp_to_uint,
-                                 loadv8f64, f512mem, v8i32, v8f64,
-                                 SSEPackedDouble>, EVEX_V512, PS, VEX_W,
+defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtpd2Int,
+                                 X86cvtpd2IntRnd>, XD, VEX_W,
                                  EVEX_CD8<64, CD8VF>;
 
-// cvttpd2udq (src, 0, mask-all-ones, sae-current)
-def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
-                   (v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
-          (VCVTTPD2UDQZrr VR512:$src)>;
+defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtps2UInt,
+                                 X86cvtps2UIntRnd>,
+                                 PS, EVEX_CD8<32, CD8VF>;
+defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtpd2UInt,
+                                 X86cvtpd2UIntRnd>, VEX_W,
+                                 PS, EVEX_CD8<64, CD8VF>;
 
-defm VCVTUDQ2PDZ : avx512_vcvt_fp<0x7A, "vcvtudq2pd", VR256X, VR512, uint_to_fp,
-                                 loadv4i64, f256mem, v8f64, v8i32,
-                                 SSEPackedDouble>, EVEX_V512, XS,
-                                 EVEX_CD8<32, CD8VH>;
+defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtpd2Int,
+                                 X86cvtpd2IntRnd>, VEX_W,
+                                 PD, EVEX_CD8<64, CD8VF>;
 
-defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
-                                 loadv16i32, f512mem, v16f32, v16i32,
-                                 SSEPackedSingle>, EVEX_V512, XD,
-                                 EVEX_CD8<32, CD8VF>;
+defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtps2Int,
+                                 X86cvtps2IntRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtpd2UInt,
+                                 X86cvtpd2UIntRnd>, VEX_W,
+                                 PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtps2UInt,
+                                 X86cvtps2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
+                                 X86VFpToSlongRnd>, VEX_W,
+                                 PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint,
+                                 X86VFpToSlongRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
+                                 X86VFpToUlongRnd>, VEX_W,
+                                 PD, EVEX_CD8<64, CD8VF>;
+
+defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint,
+                                 X86VFpToUlongRnd>, PD, EVEX_CD8<32, CD8VH>;
+
+defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
+                            X86VSlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
 
+defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
+                            X86VUlongToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
+
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
+                            X86VSlongToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
+                            X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
+
+let Predicates = [NoVLX] in {
 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -4734,67 +5012,8 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
            (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
-                   (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
-          (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
-def : Pat<(v8f64 (int_x86_avx512_mask_cvtdq2pd_512 (v8i32 VR256X:$src),
-                   (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VCVTDQ2PDZrr VR256X:$src)>;
-def : Pat<(v16f32 (int_x86_avx512_mask_cvtudq2ps_512 (v16i32 VR512:$src),
-                   (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
-          (VCVTUDQ2PSZrrb VR512:$src, imm:$rc)>;
-def : Pat<(v8f64 (int_x86_avx512_mask_cvtudq2pd_512 (v8i32 VR256X:$src),
-                   (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
-          (VCVTUDQ2PDZrr VR256X:$src)>;
-
-multiclass avx512_vcvt_fp2int<bits<8> opc, string asm, RegisterClass SrcRC,
-               RegisterClass DstRC, PatFrag mem_frag,
-               X86MemOperand x86memop, Domain d> {
-let hasSideEffects = 0 in {
-  def rr : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [], d>, EVEX;
-  def rrb : AVX512PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
-              !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
-              [], d>, EVEX, EVEX_B, EVEX_RC;
-  let mayLoad = 1 in
-  def rm : AVX512PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [], d>, EVEX;
-} // hasSideEffects = 0
 }
 
-defm VCVTPS2DQZ : avx512_vcvt_fp2int<0x5B, "vcvtps2dq", VR512, VR512,
-                                 loadv16f32, f512mem, SSEPackedSingle>, PD,
-                                 EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2DQZ : avx512_vcvt_fp2int<0xE6, "vcvtpd2dq", VR512, VR256X,
-                                 loadv8f64, f512mem, SSEPackedDouble>, XD, VEX_W,
-                                 EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2dq_512 (v16f32 VR512:$src),
-                    (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
-           (VCVTPS2DQZrrb VR512:$src, imm:$rc)>;
-
-def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2dq_512 (v8f64 VR512:$src),
-                    (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
-           (VCVTPD2DQZrrb VR512:$src, imm:$rc)>;
-
-defm VCVTPS2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtps2udq", VR512, VR512,
-                                 loadv16f32, f512mem, SSEPackedSingle>,
-                                 PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VCVTPD2UDQZ : avx512_vcvt_fp2int<0x79, "vcvtpd2udq", VR512, VR256X,
-                                 loadv8f64, f512mem, SSEPackedDouble>, VEX_W,
-                                 PS, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat <(v16i32 (int_x86_avx512_mask_cvtps2udq_512 (v16f32 VR512:$src),
-                    (v16i32 immAllZerosV), (i16 -1), imm:$rc)),
-           (VCVTPS2UDQZrrb VR512:$src, imm:$rc)>;
-
-def : Pat <(v8i32 (int_x86_avx512_mask_cvtpd2udq_512 (v8f64 VR512:$src),
-                    (v8i32 immAllZerosV), (i8 -1), imm:$rc)),
-           (VCVTPD2UDQZrrb VR512:$src, imm:$rc)>;
-
 let Predicates = [HasAVX512] in {
   def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
             (VCVTPD2PSZrm addr:$src)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 6ab961f04ecf..4cd5563ce727 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -105,14 +105,16 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in
   // jecxz.
   let Uses = [CX] in
     def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                        "jcxz\t$dst", [], IIC_JCXZ>, AdSize16;
+                        "jcxz\t$dst", [], IIC_JCXZ>, AdSize16,
+                        Requires<[Not64BitMode]>;
   let Uses = [ECX] in
     def JECXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
                         "jecxz\t$dst", [], IIC_JCXZ>, AdSize32;
 
   let Uses = [RCX] in
     def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                           "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64;
+                         "jrcxz\t$dst", [], IIC_JCXZ>, AdSize64,
+                         Requires<[In64BitMode]>;
 }
 
 // Indirect branches
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index fe245c3a7e38..1f61ffa84e9a 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -39,11 +39,6 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
                                        SDTCisFP<1>, SDTCisVT<3, i8>,
                                        SDTCisVec<1>]>;
 
-def X86umin    : SDNode<"X86ISD::UMIN",      SDTIntBinOp>;
-def X86umax    : SDNode<"X86ISD::UMAX",      SDTIntBinOp>;
-def X86smin    : SDNode<"X86ISD::SMIN",      SDTIntBinOp>;
-def X86smax    : SDNode<"X86ISD::SMAX",      SDTIntBinOp>;
-
 def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
 def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
 
@@ -75,6 +70,9 @@ def X86cmps    : SDNode<"X86ISD::FSETCC",     SDTX86Cmps>;
 def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
                  SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
                                       SDTCisVT<1, v4i32>]>>;
+def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
+                 SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
+                                      SDTCisVT<1, v4i32>]>>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
@@ -187,6 +185,7 @@ def X86addus   : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
 def X86subus   : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
 def X86adds    : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
 def X86subs    : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
+def X86mulhrs  : SDNode<"X86ISD::MULHRS" , SDTIntBinOp>;
 def X86avg     : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
@@ -208,6 +207,14 @@ def X86pmuldq  : SDNode<"X86ISD::PMULDQ",
                          SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                        SDTCisSameAs<1,2>]>>;
 
+def X86extrqi : SDNode<"X86ISD::EXTRQI",
+                  SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+                                       SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>;
+def X86insertqi : SDNode<"X86ISD::INSERTQI",
+                    SDTypeProfile<1, 4, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+                                         SDTCisSameAs<1,2>, SDTCisVT<3, i8>,
+                                         SDTCisVT<4, i8>]>>;
+
 // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
 // translated into one of the target nodes below during lowering.
 // Note: this is a work in progress...
@@ -357,8 +364,70 @@ def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
                                SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
 
-def X86SintToFpRnd   : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
-def X86SuintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
+def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                         SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                         SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
+
+def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                         SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                         SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
+
+def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                           SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
+                                           SDTCisInt<2>]>;
+def SDTVlongToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                           SDTCisFP<0>, SDTCVecEltisVT<1, i64>,
+                                           SDTCisInt<2>]>;
+
+def SDTVFPToIntRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                           SDTCisFP<1>, SDTCVecEltisVT<0, i32>,
+                                           SDTCisInt<2>]>;
+def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                           SDTCisFP<1>, SDTCVecEltisVT<0, i64>,
+                                           SDTCisInt<2>]>;
+
+// Scalar
+def X86SintToFpRnd  : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
+def X86UintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
+
+// Vector with rounding mode
+
+// cvtt fp-to-int staff
+def X86VFpToSintRnd   : SDNode<"ISD::FP_TO_SINT",  SDTVFPToIntRound>;
+def X86VFpToUintRnd   : SDNode<"ISD::FP_TO_UINT",  SDTVFPToIntRound>;
+def X86VFpToSlongRnd  : SDNode<"ISD::FP_TO_SINT",  SDTVFPToLongRound>;
+def X86VFpToUlongRnd  : SDNode<"ISD::FP_TO_UINT",  SDTVFPToLongRound>;
+
+def X86VSintToFpRnd   : SDNode<"ISD::SINT_TO_FP",  SDTVintToFPRound>;
+def X86VUintToFpRnd   : SDNode<"ISD::UINT_TO_FP",  SDTVintToFPRound>;
+def X86VSlongToFpRnd  : SDNode<"ISD::SINT_TO_FP",  SDTVlongToFPRound>;
+def X86VUlongToFpRnd  : SDNode<"ISD::UINT_TO_FP",  SDTVlongToFPRound>;
+
+// cvt fp-to-int staff
+def X86cvtps2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTFloatToIntRnd>;
+def X86cvtps2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTFloatToIntRnd>;
+def X86cvtpd2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTDoubleToIntRnd>;
+def X86cvtpd2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTDoubleToIntRnd>;
+
+// Vector without rounding mode
+def X86cvtps2Int      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTFloatToInt>;
+def X86cvtps2UInt     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTFloatToInt>;
+def X86cvtpd2Int      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTDoubleToInt>;
+def X86cvtpd2UInt     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTDoubleToInt>;
+
+def X86vfpextRnd  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCisFP<0>, SDTCisFP<1>,
+                                             SDTCisOpSmallerThanOp<1, 0>,
+                                             SDTCisInt<2>]>>;
+def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCisFP<0>, SDTCisFP<1>,
+                                             SDTCVecEltisVT<0, f32>,
+                                             SDTCVecEltisVT<1, f64>,
+                                             SDTCisInt<2>]>>;
 
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index b92ba99fb100..786150760b93 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -269,14 +269,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::XOR8rr,      X86::XOR8mr,     0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2Addr); i != e; ++i) {
-    unsigned RegOp = MemoryFoldTable2Addr[i].RegOp;
-    unsigned MemOp = MemoryFoldTable2Addr[i].MemOp;
-    unsigned Flags = MemoryFoldTable2Addr[i].Flags;
+  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) {
     AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
-                  RegOp, MemOp,
+                  Entry.RegOp, Entry.MemOp,
                   // Index 0, folded load and store, no alignment requirement.
-                  Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
+                  Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
   }
 
   static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
@@ -424,12 +421,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VCVTPS2PHYrr,       X86::VCVTPS2PHYmr,     TB_FOLDED_STORE }
   };
 
-  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable0); i != e; ++i) {
-    unsigned RegOp      = MemoryFoldTable0[i].RegOp;
-    unsigned MemOp      = MemoryFoldTable0[i].MemOp;
-    unsigned Flags      = MemoryFoldTable0[i].Flags;
+  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) {
     AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
-                  RegOp, MemOp, TB_INDEX_0 | Flags);
+                  Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags);
   }
 
   static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
@@ -862,14 +856,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable1); i != e; ++i) {
-    unsigned RegOp = MemoryFoldTable1[i].RegOp;
-    unsigned MemOp = MemoryFoldTable1[i].MemOp;
-    unsigned Flags = MemoryFoldTable1[i].Flags;
+  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) {
     AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
-                  RegOp, MemOp,
+                  Entry.RegOp, Entry.MemOp,
                   // Index 1, folded load
-                  Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
+                  Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
   }
 
   static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
@@ -1116,6 +1107,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm,  TB_ALIGN_16 },
     { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm,   TB_ALIGN_16 },
     { X86::PXORrr,          X86::PXORrm,        TB_ALIGN_16 },
+    { X86::ROUNDSDr,        X86::ROUNDSDm,      0 },
+    { X86::ROUNDSSr,        X86::ROUNDSSm,      0 },
     { X86::SBB32rr,         X86::SBB32rm,       0 },
     { X86::SBB64rr,         X86::SBB64rm,       0 },
     { X86::SHUFPDrri,       X86::SHUFPDrmi,     TB_ALIGN_16 },
@@ -1412,6 +1405,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      0 },
     { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       0 },
     { X86::VPXORrr,           X86::VPXORrm,            0 },
+    { X86::VROUNDSDr,         X86::VROUNDSDm,          0 },
+    { X86::VROUNDSSr,         X86::VROUNDSSm,          0 },
     { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         0 },
     { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         0 },
     { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
@@ -1733,14 +1728,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::SHA256RNDS2rr,     X86::SHA256RNDS2rm,       TB_ALIGN_16 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable2); i != e; ++i) {
-    unsigned RegOp = MemoryFoldTable2[i].RegOp;
-    unsigned MemOp = MemoryFoldTable2[i].MemOp;
-    unsigned Flags = MemoryFoldTable2[i].Flags;
+  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) {
     AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
-                  RegOp, MemOp,
+                  Entry.RegOp, Entry.MemOp,
                   // Index 2, folded load
-                  Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
+                  Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD);
   }
 
   static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
@@ -1949,14 +1941,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMAXPDZ128rrkz,        X86::VMAXPDZ128rmkz,        0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable3); i != e; ++i) {
-    unsigned RegOp = MemoryFoldTable3[i].RegOp;
-    unsigned MemOp = MemoryFoldTable3[i].MemOp;
-    unsigned Flags = MemoryFoldTable3[i].Flags;
+  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
     AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
-                  RegOp, MemOp,
+                  Entry.RegOp, Entry.MemOp,
                   // Index 3, folded load
-                  Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
+                  Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
   }
 
   static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
@@ -2001,14 +1990,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMAXPDZ128rrk,      X86::VMAXPDZ128rmk,        0 }
   };
 
-  for (unsigned i = 0, e = array_lengthof(MemoryFoldTable4); i != e; ++i) {
-    unsigned RegOp = MemoryFoldTable4[i].RegOp;
-    unsigned MemOp = MemoryFoldTable4[i].MemOp;
-    unsigned Flags = MemoryFoldTable4[i].Flags;
+  for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
     AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
-                  RegOp, MemOp,
+                  Entry.RegOp, Entry.MemOp,
                   // Index 4, folded load
-                  Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
+                  Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
   }
 }
 
@@ -3820,7 +3806,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
                                                X86::MOVPQIto64rr);
     if (X86::VR64RegClass.contains(SrcReg))
       // Copy from a VR64 register to a GR64 register.
-      return X86::MOVSDto64rr;
+      return X86::MMX_MOVD64from64rr;
   } else if (X86::GR64RegClass.contains(SrcReg)) {
     // Copy from a GR64 register to a VR128 register.
     if (X86::VR128XRegClass.contains(DestReg))
@@ -3828,7 +3814,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
                                                X86::MOV64toPQIrr);
     // Copy from a GR64 register to a VR64 register.
     if (X86::VR64RegClass.contains(DestReg))
-      return X86::MOV64toSDrr;
+      return X86::MMX_MOVD64to64rr;
   }
 
   // SrcReg(FR32) -> DestReg(GR32)
@@ -6413,22 +6399,40 @@ static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
       hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
       MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
     return true;
-  
+
   return false;
 }
 
+// TODO: There are many more machine instruction opcodes to match:
+//       1. Other data types (integer, vectors)
+//       2. Other math / logic operations (and, or)
+static bool isAssociativeAndCommutative(unsigned Opcode) {
+  switch (Opcode) {
+  case X86::ADDSDrr:
+  case X86::ADDSSrr:
+  case X86::VADDSDrr:
+  case X86::VADDSSrr:
+  case X86::MULSDrr:
+  case X86::MULSSrr:
+  case X86::VMULSDrr:
+  case X86::VMULSSrr:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Return true if the input instruction is part of a chain of dependent ops
 /// that are suitable for reassociation, otherwise return false.
 /// If the instruction's operands must be commuted to have a previous
 /// instruction of the same type define the first source operand, Commuted will
 /// be set to true.
-static bool isReassocCandidate(const MachineInstr &Inst, unsigned AssocOpcode,
-                               bool &Commuted) {
-  // 1. The instruction must have the correct type.
+static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
+  // 1. The operation must be associative and commutative.
   // 2. The instruction must have virtual register definitions for its
   //    operands in the same basic block.
-  // 3. The instruction must have a reassociatable sibling.
-  if (Inst.getOpcode() == AssocOpcode &&
+  // 3. The instruction must have a reassociable sibling.
+  if (isAssociativeAndCommutative(Inst.getOpcode()) &&
       hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
       hasReassocSibling(Inst, Commuted))
     return true;
@@ -6455,14 +6459,8 @@ bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
   //   B = A op X (Prev)
   //   C = B op Y (Root)
 
-  // TODO: There are many more associative instruction types to match:
-  //       1. Other forms of scalar FP add (non-AVX)
-  //       2. Other data types (double, integer, vectors)
-  //       3. Other math / logic operations (mul, and, or)
-  unsigned AssocOpcode = X86::VADDSSrr;
-
-  bool Commute = false;
-  if (isReassocCandidate(Root, AssocOpcode, Commute)) {
+  bool Commute;
+  if (isReassocCandidate(Root, Commute)) {
     // We found a sequence of instructions that may be suitable for a
     // reassociation of operands to increase ILP. Specify each commutation
     // possibility for the Prev instruction in the sequence and let the
@@ -6512,7 +6510,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
   MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
   MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
   MachineOperand &OpC = Root.getOperand(0);
-  
+
   unsigned RegA = OpA.getReg();
   unsigned RegB = OpB.getReg();
   unsigned RegX = OpX.getReg();
@@ -6547,7 +6545,7 @@ static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
       .addReg(RegX, getKillRegState(KillX))
       .addReg(RegY, getKillRegState(KillY));
   InsInstrs.push_back(MIB1);
-  
+
   MachineInstrBuilder MIB2 =
     BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
       .addReg(RegA, getKillRegState(KillA))
@@ -6579,7 +6577,7 @@ void X86InstrInfo::genAlternativeCodeSequence(
       Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
   }
   assert(Prev && "Unknown pattern for machine combiner");
-  
+
   reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
   return;
 }
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 6f38cb8eaf33..52bab9c79b45 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -194,7 +194,7 @@ def X86rdpmc   : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
 def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
 def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP",  SDTX86Wrapper>;
 
-def X86RecoverFrameAlloc : SDNode<"ISD::FRAME_ALLOC_RECOVER",
+def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER",
                                   SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
                                                        SDTCisInt<1>]>>;
 
@@ -1028,14 +1028,13 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
                  IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
 
 def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
-                   "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
-                   Requires<[Not64BitMode]>;
+                   "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
+def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+                   "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
+
 def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
                    "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
                    Requires<[Not64BitMode]>;
-def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
-                   "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
-                   Requires<[Not64BitMode]>;
 def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
                    "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
                    Requires<[Not64BitMode]>;
@@ -1081,9 +1080,6 @@ let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
     SchedRW = [WriteStore] in {
 def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
                     "push{q}\t$imm", [], IIC_PUSH_IMM>, Requires<[In64BitMode]>;
-def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
-                    "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16,
-                    Requires<[In64BitMode]>;
 def PUSH64i32  : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
                     "push{q}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
                     Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2a896dfe8aa8..a5ff9edf05a3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4035,13 +4035,13 @@ defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
                              SSE_INTALU_ITINS_P, 0>;
 defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
                              SSE_INTALU_ITINS_P, 0>;
-defm PMINUB  : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
+defm PMINUB  : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
                              SSE_INTALU_ITINS_P, 1>;
-defm PMINSW  : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
+defm PMINSW  : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
                              SSE_INTALU_ITINS_P, 1>;
-defm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
+defm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
                              SSE_INTALU_ITINS_P, 1>;
-defm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
+defm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
                              SSE_INTALU_ITINS_P, 1>;
 
 // Intrinsic forms
@@ -6834,29 +6834,28 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 let Predicates = [HasAVX, NoVLX] in {
-  let isCommutable = 0 in
-  defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
+  defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
-  defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
+  defm VPMINSD   : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
-  defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
+  defm VPMINUD   : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
-  defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
+  defm VPMINUW   : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
-  defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
+  defm VPMAXSB   : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
-  defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
+  defm VPMAXSD   : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
-  defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
+  defm VPMAXUD   : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
-  defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
+  defm VPMAXUW   : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V;
   defm VPMULDQ   : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
@@ -6865,29 +6864,28 @@ let Predicates = [HasAVX, NoVLX] in {
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
-  let isCommutable = 0 in
-  defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
+  defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
-  defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
+  defm VPMINSDY  : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
-  defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
+  defm VPMINUDY  : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
-  defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
+  defm VPMINUWY  : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
-  defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
+  defm VPMAXSBY  : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
-  defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
+  defm VPMAXSDY  : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
-  defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
+  defm VPMAXUDY  : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
-  defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
+  defm VPMAXUWY  : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
                                   VEX_4V, VEX_L;
   defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
@@ -6896,22 +6894,21 @@ let Predicates = [HasAVX2, NoVLX] in {
 }
 
 let Constraints = "$src1 = $dst" in {
-  let isCommutable = 0 in
-  defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
+  defm PMINSB   : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
-  defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
+  defm PMINSD   : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
-  defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
+  defm PMINUD   : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
-  defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
+  defm PMINUW   : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
-  defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
+  defm PMAXSB   : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
-  defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
+  defm PMAXSD   : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
-  defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
+  defm PMAXUD   : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
-  defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
+  defm PMAXUW   : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
                                  memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
   defm PMULDQ   : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32,
                                   VR128, memopv2i64, i128mem,
@@ -7773,7 +7770,7 @@ let Constraints = "$src = $dst" in {
 def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
                  (ins VR128:$src, u8imm:$len, u8imm:$idx),
                  "extrq\t{$idx, $len, $src|$src, $len, $idx}",
-                 [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len,
+                 [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
                                     imm:$idx))]>, PD;
 def EXTRQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
               (ins VR128:$src, VR128:$mask),
@@ -7784,8 +7781,8 @@ def EXTRQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
 def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
                    (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
                    "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
-                   [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src,
-                                      VR128:$src2, imm:$len, imm:$idx))]>, XD;
+                   [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
+                                      imm:$len, imm:$idx))]>, XD;
 def INSERTQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
                  (ins VR128:$src, VR128:$mask),
                  "insertq\t{$mask, $src|$src, $mask}",
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 61a33484b8bf..2c8b95bcba22 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -19,7 +19,7 @@ namespace llvm {
 enum IntrinsicType {
   INTR_NO_TYPE,
   GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
-  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
+  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
   CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
   INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
   INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
@@ -213,18 +213,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
   X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxs_b, INTR_TYPE_2OP, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxs_d, INTR_TYPE_2OP, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxs_w, INTR_TYPE_2OP, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxu_d, INTR_TYPE_2OP, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmaxu_w, INTR_TYPE_2OP, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx2_pmins_b, INTR_TYPE_2OP, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pmins_d, INTR_TYPE_2OP, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
@@ -596,60 +596,69 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512,  CMP_MASK,  X86ISD::PCMPGTM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
                      X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
                      X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
                      X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_128, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_256, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_512, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulh_w_128, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulh_w_256, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulh_w_512, INTR_TYPE_2OP_MASK, ISD::MULHS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_128, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_256, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_512, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
@@ -1008,10 +1017,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(sse2_pmaxs_w,      INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(sse2_pmaxu_b,      INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(sse2_pmins_w,      INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(sse2_pminu_b,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(sse2_pmaxs_w,      INTR_TYPE_2OP, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(sse2_pmaxu_b,      INTR_TYPE_2OP, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(sse2_pmins_w,      INTR_TYPE_2OP, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(sse2_pminu_b,      INTR_TYPE_2OP, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(sse2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),
   X86_INTRINSIC_DATA(sse2_pmulhu_w,     INTR_TYPE_2OP, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(sse2_pmulu_dq,     INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
@@ -1049,14 +1058,14 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse3_hsub_ps,      INTR_TYPE_2OP, X86ISD::FHSUB, 0),
   X86_INTRINSIC_DATA(sse41_insertps,    INTR_TYPE_3OP, X86ISD::INSERTPS, 0),
   X86_INTRINSIC_DATA(sse41_packusdw,    INTR_TYPE_2OP, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(sse41_pmaxsb,      INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(sse41_pmaxsd,      INTR_TYPE_2OP, X86ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(sse41_pmaxud,      INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(sse41_pmaxuw,      INTR_TYPE_2OP, X86ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(sse41_pminsb,      INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(sse41_pminsd,      INTR_TYPE_2OP, X86ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(sse41_pminud,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(sse41_pminuw,      INTR_TYPE_2OP, X86ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(sse41_pmaxsb,      INTR_TYPE_2OP, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(sse41_pmaxsd,      INTR_TYPE_2OP, ISD::SMAX, 0),
+  X86_INTRINSIC_DATA(sse41_pmaxud,      INTR_TYPE_2OP, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(sse41_pmaxuw,      INTR_TYPE_2OP, ISD::UMAX, 0),
+  X86_INTRINSIC_DATA(sse41_pminsb,      INTR_TYPE_2OP, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(sse41_pminsd,      INTR_TYPE_2OP, ISD::SMIN, 0),
+  X86_INTRINSIC_DATA(sse41_pminud,      INTR_TYPE_2OP, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(sse41_pminuw,      INTR_TYPE_2OP, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(sse41_pmovsxbd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovsxbq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovsxbw,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
@@ -1070,6 +1079,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse41_pmovzxwd,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovzxwq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmuldq,      INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(sse4a_extrqi,      INTR_TYPE_3OP, X86ISD::EXTRQI, 0),
+  X86_INTRINSIC_DATA(sse4a_insertqi,    INTR_TYPE_4OP, X86ISD::INSERTQI, 0),
   X86_INTRINSIC_DATA(sse_comieq_ss,     COMI, X86ISD::COMI, ISD::SETEQ),
   X86_INTRINSIC_DATA(sse_comige_ss,     COMI, X86ISD::COMI, ISD::SETGE),
   X86_INTRINSIC_DATA(sse_comigt_ss,     COMI, X86ISD::COMI, ISD::SETGT),
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index d598b55aae3e..e6db9708b677 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -30,59 +30,67 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// pointer for reasons other than it containing dynamic allocation or
   /// that FP eliminatation is turned off. For example, Cygwin main function
   /// contains stack pointer re-alignment code which requires FP.
-  bool ForceFramePointer;
+  bool ForceFramePointer = false;
 
   /// RestoreBasePointerOffset - Non-zero if the function has base pointer
   /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
   /// displacement from the frame pointer to a slot where the base pointer
   /// is stashed.
-  signed char RestoreBasePointerOffset;
+  signed char RestoreBasePointerOffset = 0;
 
   /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
   /// stack frame in bytes.
-  unsigned CalleeSavedFrameSize;
+  unsigned CalleeSavedFrameSize = 0;
 
   /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
   /// to the space used by the return address).
   /// Used on windows platform for stdcall & fastcall name decoration
-  unsigned BytesToPopOnReturn;
+  unsigned BytesToPopOnReturn = 0;
 
   /// ReturnAddrIndex - FrameIndex for return slot.
-  int ReturnAddrIndex;
+  int ReturnAddrIndex = 0;
 
   /// \brief FrameIndex for return slot.
-  int FrameAddrIndex;
+  int FrameAddrIndex = 0;
 
   /// TailCallReturnAddrDelta - The number of bytes by which return address
   /// stack slot is moved as the result of tail call optimization.
-  int TailCallReturnAddrDelta;
+  int TailCallReturnAddrDelta = 0;
 
   /// SRetReturnReg - Some subtargets require that sret lowering includes
   /// returning the value of the returned struct in a register. This field
   /// holds the virtual register into which the sret argument is passed.
-  unsigned SRetReturnReg;
+  unsigned SRetReturnReg = 0;
 
   /// GlobalBaseReg - keeps track of the virtual register initialized for
   /// use as the global base register. This is used for PIC in some PIC
   /// relocation models.
-  unsigned GlobalBaseReg;
+  unsigned GlobalBaseReg = 0;
 
   /// VarArgsFrameIndex - FrameIndex for start of varargs area.
-  int VarArgsFrameIndex;
+  int VarArgsFrameIndex = 0;
   /// RegSaveFrameIndex - X86-64 vararg func register save area.
-  int RegSaveFrameIndex;
+  int RegSaveFrameIndex = 0;
   /// VarArgsGPOffset - X86-64 vararg func int reg offset.
-  unsigned VarArgsGPOffset;
+  unsigned VarArgsGPOffset = 0;
   /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
-  unsigned VarArgsFPOffset;
+  unsigned VarArgsFPOffset = 0;
   /// ArgumentStackSize - The number of bytes on stack consumed by the arguments
   /// being passed on the stack.
-  unsigned ArgumentStackSize;
+  unsigned ArgumentStackSize = 0;
   /// NumLocalDynamics - Number of local-dynamic TLS accesses.
-  unsigned NumLocalDynamics;
+  unsigned NumLocalDynamics = 0;
   /// HasPushSequences - Keeps track of whether this function uses sequences
   /// of pushes to pass function parameters.
-  bool HasPushSequences;
+  bool HasPushSequences = false;
+
+  /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill
+  /// slot for the frame pointer.
+  bool HasSEHFramePtrSave = false;
+
+  /// The frame index of a stack object containing the original frame pointer
+  /// used to address arguments in a function using a base pointer.
+  int SEHFramePtrSaveIndex = 0;
 
 private:
   /// ForwardedMustTailRegParms - A list of virtual and physical registers
@@ -90,40 +98,9 @@ private:
   SmallVector<ForwardedRegister, 1> ForwardedMustTailRegParms;
 
 public:
-  X86MachineFunctionInfo() : ForceFramePointer(false),
-                             RestoreBasePointerOffset(0),
-                             CalleeSavedFrameSize(0),
-                             BytesToPopOnReturn(0),
-                             ReturnAddrIndex(0),
-                             FrameAddrIndex(0),
-                             TailCallReturnAddrDelta(0),
-                             SRetReturnReg(0),
-                             GlobalBaseReg(0),
-                             VarArgsFrameIndex(0),
-                             RegSaveFrameIndex(0),
-                             VarArgsGPOffset(0),
-                             VarArgsFPOffset(0),
-                             ArgumentStackSize(0),
-                             NumLocalDynamics(0),
-                             HasPushSequences(false) {}
-
-  explicit X86MachineFunctionInfo(MachineFunction &MF)
-    : ForceFramePointer(false),
-      RestoreBasePointerOffset(0),
-      CalleeSavedFrameSize(0),
-      BytesToPopOnReturn(0),
-      ReturnAddrIndex(0),
-      FrameAddrIndex(0),
-      TailCallReturnAddrDelta(0),
-      SRetReturnReg(0),
-      GlobalBaseReg(0),
-      VarArgsFrameIndex(0),
-      RegSaveFrameIndex(0),
-      VarArgsGPOffset(0),
-      VarArgsFPOffset(0),
-      ArgumentStackSize(0),
-      NumLocalDynamics(0),
-      HasPushSequences(false) {}
+  X86MachineFunctionInfo() = default;
+
+  explicit X86MachineFunctionInfo(MachineFunction &MF) {};
 
   bool getForceFramePointer() const { return ForceFramePointer;}
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@@ -174,6 +151,12 @@ public:
   unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
   void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
 
+  bool getHasSEHFramePtrSave() const { return HasSEHFramePtrSave; }
+  void setHasSEHFramePtrSave(bool V) { HasSEHFramePtrSave = V; }
+
+  int getSEHFramePtrSaveIndex() const { return SEHFramePtrSaveIndex; }
+  void setSEHFramePtrSaveIndex(int Index) { SEHFramePtrSaveIndex = Index; }
+
   SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
     return ForwardedMustTailRegParms;
   }
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 0033b5058187..d8495e53e0e3 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -202,7 +202,7 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
 unsigned
 X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
                                      MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const X86FrameLowering *TFI = getFrameLowering(MF);
 
   unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
   switch (RC->getID()) {
@@ -343,7 +343,7 @@ X86RegisterInfo::getNoPreservedMask() const {
 
 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const X86FrameLowering *TFI = getFrameLowering(MF);
 
   // Set the stack-pointer register and its aliases as reserved.
   for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
@@ -452,7 +452,7 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
    // use both the SP and the FP, we need a separate base pointer register.
    bool CantUseFP = needsStackRealignment(MF);
    bool CantUseSP =
-       MFI->hasVarSizedObjects() || MFI->hasInlineAsmWithSPAdjust();
+       MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
    return CantUseFP && CantUseSP;
 }
 
@@ -477,9 +477,9 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
 
 bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const X86FrameLowering *TFI = getFrameLowering(MF);
   const Function *F = MF.getFunction();
-  unsigned StackAlign =
-    MF.getSubtarget().getFrameLowering()->getStackAlignment();
+  unsigned StackAlign = TFI->getStackAlignment();
   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                               F->hasFnAttribute(Attribute::StackAlignment));
 
@@ -503,7 +503,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      RegScavenger *RS) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const X86FrameLowering *TFI = getFrameLowering(MF);
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
   unsigned BasePtr;
 
@@ -519,18 +519,17 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   else
     BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
 
-  // FRAME_ALLOC uses a single offset, with no register. It only works in the
+  // LOCAL_ESCAPE uses a single offset, with no register. It only works in the
   // simple FP case, and doesn't work with stack realignment. On 32-bit, the
   // offset is from the traditional base pointer location.  On 64-bit, the
   // offset is from the SP at the end of the prologue, not the FP location. This
   // matches the behavior of llvm.frameaddress.
-  if (Opc == TargetOpcode::FRAME_ALLOC) {
+  if (Opc == TargetOpcode::LOCAL_ESCAPE) {
     MachineOperand &FI = MI.getOperand(FIOperandNum);
     bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
     int Offset;
     if (IsWinEH)
-      Offset = static_cast<const X86FrameLowering *>(TFI)
-                     ->getFrameIndexOffsetFromSP(MF, FrameIndex);
+      Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex);
     else
       Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
     FI.ChangeToImmediate(Offset);
@@ -584,7 +583,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 }
 
 unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const X86FrameLowering *TFI = getFrameLowering(MF);
   return TFI->hasFP(MF) ? FramePtr : StackPtr;
 }
 
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 5ca40bc0091b..ce79fcf9ad81 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -24,11 +24,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "x86-selectiondag-info"
 
-X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL)
-    : TargetSelectionDAGInfo(&DL) {}
-
-X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
-
 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
     SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const {
   // We cannot use TRI->hasBasePointer() until *after* we select all basic
@@ -37,7 +32,7 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
   // dynamic stack adjustments (hopefully rare) and the base pointer would
   // conflict if we had to use it.
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust())
+  if (!MFI->hasVarSizedObjects() && !MFI->hasOpaqueSPAdjustment())
     return false;
 
   const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
@@ -81,8 +76,9 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
 
     if (const char *bzeroEntry =  V &&
         V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
-      EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy();
-      Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+      EVT IntPtr =
+          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+      Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
       TargetLowering::ArgListTy Args;
       TargetLowering::ArgListEntry Entry;
       Entry.Node = Dst;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index eb7e0ed9de6c..961bd8c8d5ef 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -29,8 +29,7 @@ class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
                                  ArrayRef<unsigned> ClobberSet) const;
 
 public:
-  explicit X86SelectionDAGInfo(const DataLayout &DL);
-  ~X86SelectionDAGInfo();
+  explicit X86SelectionDAGInfo() = default;
 
   SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                                   SDValue Chain,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 3b25d30dc221..dff3624b7efe 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -68,7 +68,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
   if (GV->hasDLLImportStorageClass())
     return X86II::MO_DLLIMPORT;
 
-  bool isDecl = GV->isDeclarationForLinker();
+  bool isDef = GV->isStrongDefinitionForLinker();
 
   // X86-64 in PIC mode.
   if (isPICStyleRIPRel()) {
@@ -80,8 +80,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
       // If symbol visibility is hidden, the extra load is not needed if
       // target is x86-64 or the symbol is definitely defined in the current
       // translation unit.
-      if (GV->hasDefaultVisibility() &&
-          (isDecl || GV->isWeakForLinker()))
+      if (GV->hasDefaultVisibility() && !isDef)
         return X86II::MO_GOTPCREL;
     } else if (!isTargetWin64()) {
       assert(isTargetELF() && "Unknown rip-relative target");
@@ -107,7 +106,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
 
     // If this is a strong reference to a definition, it is definitely not
     // through a stub.
-    if (!isDecl && !GV->isWeakForLinker())
+    if (isDef)
       return X86II::MO_PIC_BASE_OFFSET;
 
     // Unless we have a symbol with hidden visibility, we have to go through a
@@ -117,7 +116,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
 
     // If symbol visibility is hidden, we have a stub for common symbol
     // references and external declarations.
-    if (isDecl || GV->hasCommonLinkage()) {
+    if (GV->isDeclarationForLinker() || GV->hasCommonLinkage()) {
       // Hidden $non_lazy_ptr reference.
       return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
     }
@@ -131,7 +130,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
 
     // If this is a strong reference to a definition, it is definitely not
     // through a stub.
-    if (!isDecl && !GV->isWeakForLinker())
+    if (isDef)
       return X86II::MO_NO_FLAG;
 
     // Unless we have a symbol with hidden visibility, we have to go through a
@@ -193,12 +192,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       FullFS = "+64bit,+sse2";
   }
 
-  // If feature string is not empty, parse features string.
+  // Parse features string and set the CPU.
   ParseSubtargetFeatures(CPUName, FullFS);
 
-  // Make sure the right MCSchedModel is used.
-  InitCPUSchedModel(CPUName);
-
   InstrItins = getInstrItineraryForCPU(CPUName);
 
   // It's important to keep the MCSubtargetInfo feature bits in sync with
@@ -298,9 +294,8 @@ X86Subtarget::X86Subtarget(const Triple &TT, const std::string &CPU,
                   TargetTriple.getEnvironment() != Triple::CODE16),
       In16BitMode(TargetTriple.getArch() == Triple::x86 &&
                   TargetTriple.getEnvironment() == Triple::CODE16),
-      TSInfo(*TM.getDataLayout()),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      FrameLowering(*this, getStackAlignment()) {
+      TSInfo(), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+      TLInfo(TM, *this), FrameLowering(*this, getStackAlignment()) {
   // Determine the PICStyle based on the target selected.
   if (TM.getRelocationModel() == Reloc::Static) {
     // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index d420abbe1433..f026d4295f71 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -447,8 +447,26 @@ public:
   }
 
   bool isCallingConvWin64(CallingConv::ID CC) const {
-    return (isTargetWin64() && CC != CallingConv::X86_64_SysV) ||
-           CC == CallingConv::X86_64_Win64;
+    switch (CC) {
+    // On Win64, all these conventions just use the default convention.
+    case CallingConv::C:
+    case CallingConv::Fast:
+    case CallingConv::X86_FastCall:
+    case CallingConv::X86_StdCall:
+    case CallingConv::X86_ThisCall:
+    case CallingConv::X86_VectorCall:
+    case CallingConv::Intel_OCL_BI:
+      return isTargetWin64();
+    // This convention allows using the Win64 convention on other targets.
+    case CallingConv::X86_64_Win64:
+      return true;
+    // This convention allows using the SysV convention on Windows targets.
+    case CallingConv::X86_64_SysV:
+      return false;
+    // Otherwise, who knows what this is.
+    default:
+      return false;
+    }
   }
 
   /// ClassifyGlobalReference - Classify a global variable reference for the
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 0c82a700952b..7df726091843 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -89,7 +89,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
@@ -117,6 +117,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
 
   static const CostTblEntry<MVT::SimpleValueType>
   AVX2UniformConstCostTable[] = {
+    { ISD::SRA,  MVT::v4i64,   4 }, // 2 x psrad + shuffle.
+
     { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
     { ISD::UDIV, MVT::v16i16,  6 }, // vpmulhuw sequence
     { ISD::SDIV, MVT::v8i32,  15 }, // vpmuldq sequence
@@ -211,6 +213,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
     { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
     { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+    { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
 
     { ISD::SDIV, MVT::v8i16,  6 }, // pmulhw sequence
     { ISD::UDIV, MVT::v8i16,  6 }, // pmulhuw sequence
@@ -261,12 +264,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
 
     { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
-    { ISD::SRL,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend.
     { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
 
     { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
     { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
-    { ISD::SRA,  MVT::v4i32,  4*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v4i32,    16 }, // Shift each lane + blend.
     { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized.
 
     // It is not a good idea to vectorize division. We have to scalarize it and
@@ -352,7 +355,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
   if (Kind == TTI::SK_Reverse) {
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
     unsigned Cost = 1;
     if (LT.second.getSizeInBits() > 128)
       Cost = 3; // Extract + insert + copy.
@@ -364,7 +367,7 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
   if (Kind == TTI::SK_Alternate) {
     // 64-bit packed float vectors (v2f32) are widened to type v4f32.
     // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
     // The backend knows how to generate a single VEX.256 version of
     // instruction VPBLENDW if the target supports AVX2.
@@ -464,8 +467,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
-  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
-  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
 
   static const TypeConversionCostTblEntry<MVT::SimpleValueType>
   SSE2ConvTbl[] = {
@@ -537,8 +540,8 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     if (Idx != -1)
       return AVX512ConversionTbl[Idx].Cost;
   }
-  EVT SrcTy = TLI->getValueType(Src);
-  EVT DstTy = TLI->getValueType(Dst);
+  EVT SrcTy = TLI->getValueType(DL, Src);
+  EVT DstTy = TLI->getValueType(DL, Dst);
 
   // The function getSimpleVT only handles simple value types.
   if (!SrcTy.isSimple() || !DstTy.isSimple())
@@ -667,7 +670,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
 unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
                                         Type *CondTy) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
 
   MVT MTy = LT.second;
 
@@ -740,7 +743,7 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
 
   if (Index != -1U) {
     // Legalize the type.
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
 
     // This type is legalized to a scalar type.
     if (!LT.second.isVector())
@@ -803,7 +806,7 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   }
 
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
          "Invalid Opcode");
 
@@ -850,9 +853,9 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
   }
 
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(SrcVTy);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
   unsigned Cost = 0;
-  if (LT.second != TLI->getValueType(SrcVTy).getSimpleVT() &&
+  if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
       LT.second.getVectorNumElements() == NumElem)
     // Promotion requires expand/truncate for data and a shuffle for mask.
     Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
@@ -887,7 +890,7 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
 unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
                                       bool IsPairwise) {
 
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
+  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
 
   MVT MTy = LT.second;
 
@@ -1117,11 +1120,11 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
 
 bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
   int DataWidth = DataTy->getPrimitiveSizeInBits();
-  
+
   // Todo: AVX512 allows gather/scatter, works with strided and random as well
   if ((DataWidth < 32) || (Consecutive == 0))
     return false;
-  if (ST->hasAVX512() || ST->hasAVX2()) 
+  if (ST->hasAVX512() || ST->hasAVX2())
     return true;
   return false;
 }
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index a83158440193..da3f36c2e27e 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -40,7 +40,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
 
 public:
   explicit X86TTIImpl(const X86TargetMachine *TM, Function &F)
-      : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   X86TTIImpl(const X86TTIImpl &Arg)
@@ -48,18 +49,6 @@ public:
   X86TTIImpl(X86TTIImpl &&Arg)
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
-  X86TTIImpl &operator=(const X86TTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  X86TTIImpl &operator=(X86TTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
 
   /// \name Scalar TTI Implementations
   /// @{
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 90357257b9ef..9190d0be9e4d 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -113,8 +113,8 @@ char WinEHStatePass::ID = 0;
 
 bool WinEHStatePass::doInitialization(Module &M) {
   TheModule = &M;
-  FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::frameescape);
-  FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::framerecover);
+  FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape);
+  FrameRecover = Intrinsic::getDeclaration(TheModule, Intrinsic::localrecover);
   FrameAddress = Intrinsic::getDeclaration(TheModule, Intrinsic::frameaddress);
   return false;
 }
@@ -133,7 +133,7 @@ bool WinEHStatePass::doFinalization(Module &M) {
 
 void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
   // This pass should only insert a stack allocation, memory accesses, and
-  // framerecovers.
+  // localrecovers.
   AU.setPreservesCFG();
 }
 
@@ -336,9 +336,11 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
   FunctionType *TargetFuncTy =
       FunctionType::get(Int32Ty, makeArrayRef(&ArgTys[0], 5),
                         /*isVarArg=*/false);
-  Function *Trampoline = Function::Create(
-      TrampolineTy, GlobalValue::InternalLinkage,
-      Twine("__ehhandler$") + ParentFunc->getName(), TheModule);
+  Function *Trampoline =
+      Function::Create(TrampolineTy, GlobalValue::InternalLinkage,
+                       Twine("__ehhandler$") + GlobalValue::getRealLinkageName(
+                                                   ParentFunc->getName()),
+                       TheModule);
   BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline);
   IRBuilder<> Builder(EntryBB);
   Value *LSDA = emitEHLSDA(Builder, ParentFunc);
@@ -419,14 +421,14 @@ void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
 }
 
 /// Escape RegNode so that we can access it from child handlers. Find the call
-/// to frameescape, if any, in the entry block and append RegNode to the list
+/// to localescape, if any, in the entry block and append RegNode to the list
 /// of arguments.
 int WinEHStatePass::escapeRegNode(Function &F) {
-  // Find the call to frameescape and extract its arguments.
+  // Find the call to localescape and extract its arguments.
   IntrinsicInst *EscapeCall = nullptr;
   for (Instruction &I : F.getEntryBlock()) {
     IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
-    if (II && II->getIntrinsicID() == Intrinsic::frameescape) {
+    if (II && II->getIntrinsicID() == Intrinsic::localescape) {
       EscapeCall = II;
       break;
     }
@@ -440,8 +442,10 @@ int WinEHStatePass::escapeRegNode(Function &F) {
 
   // Replace the call (if it exists) with new one. Otherwise, insert at the end
   // of the entry block.
-  IRBuilder<> Builder(&F.getEntryBlock(),
-                      EscapeCall ? EscapeCall : F.getEntryBlock().end());
+  Instruction *InsertPt = EscapeCall;
+  if (!EscapeCall)
+    InsertPt = F.getEntryBlock().getTerminator();
+  IRBuilder<> Builder(&F.getEntryBlock(), InsertPt);
   Builder.CreateCall(FrameEscape, Args);
   if (EscapeCall)
     EscapeCall->eraseFromParent();
@@ -520,6 +524,11 @@ void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
           for (auto &Handler : ActionList) {
             if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
               auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
+#ifndef NDEBUG
+              for (BasicBlock *Pred : predecessors(BA->getBasicBlock()))
+                assert(Pred->isLandingPad() &&
+                       "WinEHPrepare failed to split block");
+#endif
               ExceptBlocks.insert(BA->getBasicBlock());
             }
           }
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index ac954d0a8fa4..b4085835f285 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -40,7 +40,7 @@ static MCInstrInfo *createXCoreMCInstrInfo() {
   return X;
 }
 
-static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
+static MCRegisterInfo *createXCoreMCRegisterInfo(const Triple &TT) {
   MCRegisterInfo *X = new MCRegisterInfo();
   InitXCoreMCRegisterInfo(X, XCore::LR);
   return X;
@@ -48,9 +48,7 @@ static MCRegisterInfo *createXCoreMCRegisterInfo(StringRef TT) {
 
 static MCSubtargetInfo *
 createXCoreMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
+  return createXCoreMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
 static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
@@ -64,7 +62,8 @@ static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
   return MAI;
 }
 
-static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+static MCCodeGenInfo *createXCoreMCCodeGenInfo(const Triple &TT,
+                                               Reloc::Model RM,
                                                CodeModel::Model CM,
                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index bd834cc5be4b..76c3d8130e75 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -525,12 +525,15 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   MBB.erase(I);
 }
 
-void XCoreFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS) const {
+void XCoreFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                              BitVector &SavedRegs,
+                                              RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
 
-  bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool LRUsed = MRI.isPhysRegModified(XCore::LR);
 
   if (!LRUsed && !MF.getFunction()->isVarArg() &&
       MF.getFrameInfo()->estimateStackSize(MF))
@@ -550,7 +553,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   if (LRUsed) {
     // We will handle the LR in the prologue/epilogue
     // and allocate space on the stack ourselves.
-    MF.getRegInfo().setPhysRegUnused(XCore::LR);
+    SavedRegs.reset(XCore::LR);
     XFI->createLRSpillSlot(MF);
   }
 
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 607c77248952..69c71adc8d3f 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -47,8 +47,8 @@ namespace llvm {
 
     bool hasFP(const MachineFunction &MF) const override;
 
-    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS = nullptr) const override;
+    void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+                              RegScavenger *RS = nullptr) const override;
 
     void processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                      RegScavenger *RS = nullptr) const override;
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index f5b180b1ac0d..9d4a966dfba4 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -144,10 +144,9 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
                                     MVT::i32, MskSize);
     }
     else if (!isUInt<16>(Val)) {
-      SDValue CPIdx =
-        CurDAG->getTargetConstantPool(ConstantInt::get(
-                              Type::getInt32Ty(*CurDAG->getContext()), Val),
-                                      getTargetLowering()->getPointerTy());
+      SDValue CPIdx = CurDAG->getTargetConstantPool(
+          ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
+          getTargetLowering()->getPointerTy(CurDAG->getDataLayout()));
       SDNode *node = CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32,
                                             MVT::Other, CPIdx,
                                             CurDAG->getEntryNode());
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index aa71241102ff..d62e7428299d 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -281,7 +281,8 @@ static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL)
   if (!ObjType->isSized())
     return false;
 
-  unsigned ObjSize = XTL.getDataLayout()->getTypeAllocSize(ObjType);
+  auto &DL = GV->getParent()->getDataLayout();
+  unsigned ObjSize = DL.getTypeAllocSize(ObjType);
   return ObjSize < CodeModelLargeSize && ObjSize != 0;
 }
 
@@ -312,8 +313,9 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
     Constant *GAI = ConstantExpr::getGetElementPtr(
         Type::getInt8Ty(*DAG.getContext()), GA, Idx);
     SDValue CP = DAG.getConstantPool(GAI, MVT::i32);
-    return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP,
-                       MachinePointerInfo(), false, false, false, 0);
+    return DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL,
+                       DAG.getEntryNode(), CP, MachinePointerInfo(), false,
+                       false, false, 0);
   }
 }
 
@@ -321,11 +323,11 @@ SDValue XCoreTargetLowering::
 LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
 {
   SDLoc DL(Op);
-
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy());
+  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT);
 
-  return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+  return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, PtrVT, Result);
 }
 
 SDValue XCoreTargetLowering::
@@ -378,9 +380,10 @@ SDValue XCoreTargetLowering::
 lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base,
                                        int64_t Offset, SelectionDAG &DAG) const
 {
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
   if ((Offset & 0x3) == 0) {
-    return DAG.getLoad(getPointerTy(), DL, Chain, Base, MachinePointerInfo(),
-                       false, false, false, 0);
+    return DAG.getLoad(PtrVT, DL, Chain, Base, MachinePointerInfo(), false,
+                       false, false, 0);
   }
   // Lower to pair of consecutive word aligned loads plus some bit shifting.
   int32_t HighOffset = RoundUpToAlignment(Offset, 4);
@@ -401,11 +404,9 @@ lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base,
   SDValue LowShift = DAG.getConstant((Offset - LowOffset) * 8, DL, MVT::i32);
   SDValue HighShift = DAG.getConstant((HighOffset - Offset) * 8, DL, MVT::i32);
 
-  SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
-                            LowAddr, MachinePointerInfo(),
+  SDValue Low = DAG.getLoad(PtrVT, DL, Chain, LowAddr, MachinePointerInfo(),
                             false, false, false, 0);
-  SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
-                             HighAddr, MachinePointerInfo(),
+  SDValue High = DAG.getLoad(PtrVT, DL, Chain, HighAddr, MachinePointerInfo(),
                              false, false, false, 0);
   SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
   SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
@@ -435,8 +436,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
                                      LD->getAlignment()))
     return SDValue();
 
-  unsigned ABIAlignment = getDataLayout()->
-    getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  auto &TD = DAG.getDataLayout();
+  unsigned ABIAlignment = TD.getABITypeAlignment(
+      LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
   // Leave aligned load alone.
   if (LD->getAlignment() >= ABIAlignment)
     return SDValue();
@@ -486,7 +488,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Lower to a call to __misaligned_load(BasePtr).
-  Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+  Type *IntPtrTy = TD.getIntPtrType(*DAG.getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
 
@@ -495,10 +497,11 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   Args.push_back(Entry);
 
   TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(DL).setChain(Chain)
-    .setCallee(CallingConv::C, IntPtrTy,
-               DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
-               std::move(Args), 0);
+  CLI.setDebugLoc(DL).setChain(Chain).setCallee(
+      CallingConv::C, IntPtrTy,
+      DAG.getExternalSymbol("__misaligned_load",
+                            getPointerTy(DAG.getDataLayout())),
+      std::move(Args), 0);
 
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   SDValue Ops[] = { CallResult.first, CallResult.second };
@@ -516,8 +519,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
                                      ST->getAlignment())) {
     return SDValue();
   }
-  unsigned ABIAlignment = getDataLayout()->
-    getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
+      ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
   // Leave aligned store alone.
   if (ST->getAlignment() >= ABIAlignment) {
     return SDValue();
@@ -545,7 +548,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   }
 
   // Lower to a call to __misaligned_store(BasePtr, Value).
-  Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
 
@@ -557,10 +560,11 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
   Args.push_back(Entry);
 
   TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl).setChain(Chain)
-    .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-               DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
-               std::move(Args), 0);
+  CLI.setDebugLoc(dl).setChain(Chain).setCallee(
+      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+      DAG.getExternalSymbol("__misaligned_store",
+                            getPointerTy(DAG.getDataLayout())),
+      std::move(Args), 0);
 
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   return CallResult.second;
@@ -833,9 +837,9 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   int FI = XFI->createLRSpillSlot(MF);
   SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-  return DAG.getLoad(getPointerTy(), SDLoc(Op), DAG.getEntryNode(), FIN,
-                     MachinePointerInfo::getFixedStack(FI), false, false,
-                     false, 0);
+  return DAG.getLoad(
+      getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN,
+      MachinePointerInfo::getFixedStack(FI), false, false, false, 0);
 }
 
 SDValue XCoreTargetLowering::
@@ -979,11 +983,10 @@ LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const {
   if (N->getMemoryVT() == MVT::i32) {
     if (N->getAlignment() < 4)
       report_fatal_error("atomic load must be aligned");
-    return DAG.getLoad(getPointerTy(), SDLoc(Op), N->getChain(),
-                       N->getBasePtr(), N->getPointerInfo(),
-                       N->isVolatile(), N->isNonTemporal(),
-                       N->isInvariant(), N->getAlignment(),
-                       N->getAAInfo(), N->getRanges());
+    return DAG.getLoad(getPointerTy(DAG.getDataLayout()), SDLoc(Op),
+                       N->getChain(), N->getBasePtr(), N->getPointerInfo(),
+                       N->isVolatile(), N->isNonTemporal(), N->isInvariant(),
+                       N->getAlignment(), N->getAAInfo(), N->getRanges());
   }
   if (N->getMemoryVT() == MVT::i16) {
     if (N->getAlignment() < 2)
@@ -1150,9 +1153,10 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = RetCCInfo.getNextStackOffset();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, dl,
-                                 getPointerTy(), true), dl);
+  Chain = DAG.getCALLSEQ_START(Chain,
+                               DAG.getConstant(NumBytes, dl, PtrVT, true), dl);
 
   SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
   SmallVector<SDValue, 12> MemOpChains;
@@ -1239,11 +1243,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
   InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, dl, getPointerTy(),
-                                             true),
-                             DAG.getConstant(0, dl, getPointerTy(), true),
-                             InFlag, dl);
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, dl, PtrVT, true),
+                             DAG.getConstant(0, dl, PtrVT, true), InFlag, dl);
   InFlag = Chain.getValue(1);
 
   // Handle result values, copying them out of physregs into vregs that we
@@ -1830,7 +1831,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     if (StoreBits % 8) {
       break;
     }
-    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(
+    unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
         ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
     unsigned Alignment = ST->getAlignment();
     if (Alignment >= ABIAlignment) {
@@ -1924,15 +1925,13 @@ static inline bool isImmUs4(int64_t val)
 
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
-bool
-XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                           Type *Ty,
-                                           unsigned AS) const {
+bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                                const AddrMode &AM, Type *Ty,
+                                                unsigned AS) const {
   if (Ty->getTypeID() == Type::VoidTyID)
     return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
 
-  const DataLayout *TD = TM.getDataLayout();
-  unsigned Size = TD->getTypeAllocSize(Ty);
+  unsigned Size = DL.getTypeAllocSize(Ty);
   if (AM.BaseGV) {
     return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
                  AM.BaseOffs%4 == 0;
@@ -1970,7 +1969,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 
 std::pair<unsigned, const TargetRegisterClass *>
 XCoreTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                                  const std::string &Constraint,
+                                                  StringRef Constraint,
                                                   MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 97f0494b6fe3..ddd675c5164d 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -101,7 +101,9 @@ namespace llvm {
 
 
     unsigned getJumpTableEncoding() const override;
-    MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+    MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override {
+      return MVT::i32;
+    }
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
@@ -120,8 +122,8 @@ namespace llvm {
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const override;
 
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
-                               unsigned AS) const override;
+    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                               Type *Ty, unsigned AS) const override;
 
   private:
     const TargetMachine &TM;
@@ -175,8 +177,7 @@ namespace llvm {
     // Inline asm support
     std::pair<unsigned, const TargetRegisterClass *>
     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 const std::string &Constraint,
-                                 MVT VT) const override;
+                                 StringRef Constraint, MVT VT) const override;
 
     // Expand specifics
     SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1d569e8936df..1cfb57dc3af3 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -222,7 +222,7 @@ XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     XCore::R8, XCore::R9,
     0
   };
-  const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+  const XCoreFrameLowering *TFI = getFrameLowering(*MF);
   if (TFI->hasFP(*MF))
     return CalleeSavedRegsFP;
   return CalleeSavedRegs;
@@ -230,7 +230,7 @@ XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const XCoreFrameLowering *TFI = getFrameLowering(MF);
 
   Reserved.set(XCore::CP);
   Reserved.set(XCore::DP);
@@ -270,7 +270,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   const XCoreInstrInfo &TII =
       *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
 
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const XCoreFrameLowering *TFI = getFrameLowering(MF);
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
   int StackSize = MF.getFrameInfo()->getStackSize();
 
@@ -324,7 +324,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
 
 unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const XCoreFrameLowering *TFI = getFrameLowering(MF);
 
   return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
 }
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index a34884480cea..40568d124de0 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -16,12 +16,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "xcore-selectiondag-info"
 
-XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const DataLayout &DL)
-    : TargetSelectionDAGInfo(&DL) {}
-
-XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
-}
-
 SDValue XCoreSelectionDAGInfo::
 EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
                         SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
@@ -36,18 +30,20 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
     const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
     TargetLowering::ArgListTy Args;
     TargetLowering::ArgListEntry Entry;
-    Entry.Ty = TLI.getDataLayout()->getIntPtrType(*DAG.getContext());
+    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
     Entry.Node = Dst; Args.push_back(Entry);
     Entry.Node = Src; Args.push_back(Entry);
     Entry.Node = Size; Args.push_back(Entry);
 
     TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(dl).setChain(Chain)
-      .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
-                 Type::getVoidTy(*DAG.getContext()),
-                 DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()),
-                 std::move(Args), 0)
-      .setDiscardResult();
+    CLI.setDebugLoc(dl)
+        .setChain(Chain)
+        .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                   Type::getVoidTy(*DAG.getContext()),
+                   DAG.getExternalSymbol("__memcpy_4",
+                                         TLI.getPointerTy(DAG.getDataLayout())),
+                   std::move(Args), 0)
+        .setDiscardResult();
 
     std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
     return CallResult.second;
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index cfd80b3f3172..77b3527d77e3 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -22,8 +22,6 @@ class XCoreTargetMachine;
 
 class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
 public:
-  explicit XCoreSelectionDAGInfo(const DataLayout &DL);
-  ~XCoreSelectionDAGInfo();
 
   SDValue
   EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index c98518b60225..99ad2c88504f 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -28,4 +28,4 @@ void XCoreSubtarget::anchor() { }
 XCoreSubtarget::XCoreSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS, const TargetMachine &TM)
     : XCoreGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(*this),
-      TLInfo(TM, *this), TSInfo(*TM.getDataLayout()) {}
+      TLInfo(TM, *this), TSInfo() {}
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 370b64b26688..f420081868f9 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -85,6 +85,7 @@ extern "C" void LLVMInitializeXCoreTarget() {
 }
 
 TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis(
-      [this](Function &) { return TargetTransformInfo(XCoreTTIImpl(this)); });
+  return TargetIRAnalysis([this](Function &F) {
+    return TargetTransformInfo(XCoreTTIImpl(this, F));
+  });
 }
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h
index 70b47dfa1156..e23aef3e3b4a 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -37,8 +37,9 @@ class XCoreTTIImpl : public BasicTTIImplBase<XCoreTTIImpl> {
   const XCoreTargetLowering *getTLI() const { return TLI; }
 
 public:
-  explicit XCoreTTIImpl(const XCoreTargetMachine *TM)
-      : BaseT(TM), ST(TM->getSubtargetImpl()), TLI(ST->getTargetLowering()) {}
+  explicit XCoreTTIImpl(const XCoreTargetMachine *TM, Function &F)
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
+        TLI(ST->getTargetLowering()) {}
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   XCoreTTIImpl(const XCoreTTIImpl &Arg)
@@ -46,18 +47,6 @@ public:
   XCoreTTIImpl(XCoreTTIImpl &&Arg)
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
-  XCoreTTIImpl &operator=(const XCoreTTIImpl &RHS) {
-    BaseT::operator=(static_cast<const BaseT &>(RHS));
-    ST = RHS.ST;
-    TLI = RHS.TLI;
-    return *this;
-  }
-  XCoreTTIImpl &operator=(XCoreTTIImpl &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    ST = std::move(RHS.ST);
-    TLI = std::move(RHS.TLI);
-    return *this;
-  }
 
   unsigned getNumberOfRegisters(bool Vector) {
     if (Vector) {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index f75436328252..4762011d63d8 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -825,7 +825,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
             V = GetElementPtrInst::Create(SI->first, V, Ops,
                                           V->getName() + ".idx", Call);
             Ops.clear();
-            AA.copyValue(OrigLoad->getOperand(0), V);
           }
           // Since we're replacing a load make sure we take the alignment
           // of the previous load.
@@ -837,7 +836,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
           newLoad->setAAMetadata(AAInfo);
 
           Args.push_back(newLoad);
-          AA.copyValue(OrigLoad, Args.back());
         }
       }
 
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 3df17b920a95..336dac45e13a 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMipo
   BarrierNoopPass.cpp
   ConstantMerge.cpp
   DeadArgumentElimination.cpp
+  ElimAvailExtern.cpp
   ExtractGV.cpp
   FunctionAttrs.cpp
   GlobalDCE.cpp
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 76898f275058..d0447640259e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -326,7 +326,18 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
 /// instead.
 bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
 {
-  if (Fn.isDeclaration() || Fn.mayBeOverridden())
+  // We cannot change the arguments if this TU does not define the function or
+  // if the linker may choose a function body from another TU, even if the
+  // nominal linkage indicates that other copies of the function have the same
+  // semantics. In the below example, the dead load from %p may not have been
+  // eliminated from the linker-chosen copy of f, so replacing %p with undef
+  // in callers may introduce undefined behavior.
+  //
+  // define linkonce_odr void @f(i32* %p) {
+  //   %v = load i32 %p
+  //   ret void
+  // }
+  if (!Fn.isStrongDefinitionForLinker())
     return false;
 
   // Functions with local linkage should already have been handled, except the
@@ -334,19 +345,6 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
   if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
     return false;
 
-  // If a function seen at compile time is not necessarily the one linked to
-  // the binary being built, it is illegal to change the actual arguments
-  // passed to it. These functions can be captured by isWeakForLinker().
-  // *NOTE* that mayBeOverridden() is insufficient for this purpose as it
-  // doesn't include linkage types like AvailableExternallyLinkage and
-  // LinkOnceODRLinkage. Take link_odr* as an example, it indicates a set of
-  // *EQUIVALENT* globals that can be merged at link-time. However, the
-  // semantic of *EQUIVALENT*-functions includes parameters. Changing
-  // parameters breaks this assumption.
-  //
-  if (Fn.isWeakForLinker())
-    return false;
-
   if (Fn.use_empty())
     return false;
 
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
new file mode 100644
index 000000000000..67ba72d6a360
--- /dev/null
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -0,0 +1,84 @@
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate available external global
+// definitions from the program, turning them into declarations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "elim-avail-extern"
+
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+  struct EliminateAvailableExternally : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    EliminateAvailableExternally() : ModulePass(ID) {
+      initializeEliminateAvailableExternallyPass(
+          *PassRegistry::getPassRegistry());
+    }
+
+    // run - Do the EliminateAvailableExternally pass on the specified module,
+    // optionally updating the specified callgraph to reflect the changes.
+    //
+    bool runOnModule(Module &M) override;
+  };
+}
+
+char EliminateAvailableExternally::ID = 0;
+INITIALIZE_PASS(EliminateAvailableExternally, "elim-avail-extern",
+                "Eliminate Available Externally Globals", false, false)
+
+ModulePass *llvm::createEliminateAvailableExternallyPass() {
+  return new EliminateAvailableExternally();
+}
+
+bool EliminateAvailableExternally::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Drop initializers of available externally global variables.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    if (!I->hasAvailableExternallyLinkage())
+      continue;
+    if (I->hasInitializer()) {
+      Constant *Init = I->getInitializer();
+      I->setInitializer(nullptr);
+      if (isSafeToDestroyConstant(Init))
+        Init->destroyConstant();
+    }
+    I->removeDeadConstantUsers();
+    I->setLinkage(GlobalValue::ExternalLinkage);
+    NumVariables++;
+  }
+
+  // Drop the bodies of available externally functions.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (!I->hasAvailableExternallyLinkage())
+      continue;
+    if (!I->isDeclaration())
+      // This will set the linkage to external
+      I->deleteBody();
+    I->removeDeadConstantUsers();
+    NumFunctions++;
+  }
+
+  return Changed;
+}
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 2f8c7d9349b9..b9462f2ffc72 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -93,8 +93,11 @@ namespace {
 
         makeVisible(*I, Delete);
 
-        if (Delete)
+        if (Delete) {
+          // Make this a declaration and drop it's comdat.
           I->setInitializer(nullptr);
+          I->setComdat(nullptr);
+        }
       }
 
       // Visit the Functions.
@@ -108,8 +111,11 @@ namespace {
 
         makeVisible(*I, Delete);
 
-        if (Delete)
+        if (Delete) {
+          // Make this a declaration and drop it's comdat.
           I->deleteBody();
+          I->setComdat(nullptr);
+        }
       }
 
       // Visit the Aliases.
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index fcacec3286fa..50f56b0f2afe 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -46,6 +46,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeStripDeadDebugInfoPass(Registry);
   initializeStripNonDebugSymbolsPass(Registry);
   initializeBarrierNoopPass(Registry);
+  initializeEliminateAvailableExternallyPass(Registry);
 }
 
 void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 963f1bb13aaf..88e5e479136f 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -105,6 +105,7 @@ PassManagerBuilder::PassManagerBuilder() {
     VerifyInput = false;
     VerifyOutput = false;
     MergeFunctions = false;
+    PrepareForLTO = false;
 }
 
 PassManagerBuilder::~PassManagerBuilder() {
@@ -319,8 +320,8 @@ void PassManagerBuilder::populateModulePassManager(
 
   // Re-rotate loops in all our loop nests. These may have fallout out of
   // rotated form due to GVN or other transformations, and the vectorizer relies
-  // on the rotated form.
-  MPM.add(createLoopRotatePass());
+  // on the rotated form. Disable header duplication at -Oz.
+  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
 
   // Distribute loops to allow partial vectorization.  I.e. isolate dependences
   // into separate loop that would otherwise inhibit vectorization.
@@ -401,6 +402,17 @@ void PassManagerBuilder::populateModulePassManager(
     // GlobalOpt already deletes dead functions and globals, at -O2 try a
     // late pass of GlobalDCE.  It is capable of deleting dead cycles.
     if (OptLevel > 1) {
+      if (!PrepareForLTO) {
+        // Remove avail extern fns and globals definitions if we aren't
+        // compiling an object file for later LTO. For LTO we want to preserve
+        // these so they are eligible for inlining at link-time. Note if they
+        // are unreferenced they will be removed by GlobalDCE below, so
+        // this only impacts referenced available externally globals.
+        // Eventually they will be suppressed during codegen, but eliminating
+        // here enables more opportunity for GlobalDCE as it may make
+        // globals referenced by available external functions dead.
+        MPM.add(createEliminateAvailableExternallyPass());
+      }
       MPM.add(createGlobalDCEPass());         // Remove dead fns and globals.
       MPM.add(createConstantMergePass());     // Merge dup global constants
     }
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 010b7b57c3e7..0bd6fd2f226d 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3928,8 +3928,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  if (Value *V =
-          SimplifyFCmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I))
+  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
+                                  I.getFastMathFlags(), DL, TLI, DT, AC, &I))
     return ReplaceInstUsesWith(I, V);
 
   // Simplify 'fcmp pred X, X'
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index 97ea8df757f8..ac934f1bd85c 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
 #define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetFolder.h"
@@ -177,6 +178,8 @@ private:
   // Mode in which we are running the combiner.
   const bool MinimizeSize;
 
+  AliasAnalysis *AA;
+
   // Required analyses.
   // FIXME: These can never be null and should be references.
   AssumptionCache *AC;
@@ -192,10 +195,11 @@ private:
 
 public:
   InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
-               bool MinimizeSize, AssumptionCache *AC, TargetLibraryInfo *TLI,
+               bool MinimizeSize, AliasAnalysis *AA,
+               AssumptionCache *AC, TargetLibraryInfo *TLI,
                DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
-        AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
+        AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
 
   /// \brief Run the combiner over the entire worklist until it is empty.
   ///
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e7a45330d955..e3179dbeece8 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -749,10 +749,25 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
   BasicBlock::iterator BBI = &LI;
-  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+  AAMDNodes AATags;
+  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,
+                                                     6, AA, &AATags)) {
+    if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
+      unsigned KnownIDs[] = {
+        LLVMContext::MD_tbaa,
+        LLVMContext::MD_alias_scope,
+        LLVMContext::MD_noalias,
+        LLVMContext::MD_range,
+        LLVMContext::MD_invariant_load,
+        LLVMContext::MD_nonnull,
+      };
+      combineMetadata(NLI, &LI, KnownIDs);
+    };
+
     return ReplaceInstUsesWith(
         LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(),
                                             LI.getName() + ".cast"));
+  }
 
   // load(gep null, ...) -> unreachable
   if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 24446c8578e0..273047279e90 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -14,6 +14,8 @@
 
 #include "InstCombineInternal.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -60,56 +62,6 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
   return false;
 }
 
-/// FindScalarElement - Given a vector and an element number, see if the scalar
-/// value is already around as a register, for example if it were inserted then
-/// extracted from the vector.
-static Value *FindScalarElement(Value *V, unsigned EltNo) {
-  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
-  VectorType *VTy = cast<VectorType>(V->getType());
-  unsigned Width = VTy->getNumElements();
-  if (EltNo >= Width)  // Out of range access.
-    return UndefValue::get(VTy->getElementType());
-
-  if (Constant *C = dyn_cast<Constant>(V))
-    return C->getAggregateElement(EltNo);
-
-  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
-    // If this is an insert to a variable element, we don't know what it is.
-    if (!isa<ConstantInt>(III->getOperand(2)))
-      return nullptr;
-    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
-
-    // If this is an insert to the element we are looking for, return the
-    // inserted value.
-    if (EltNo == IIElt)
-      return III->getOperand(1);
-
-    // Otherwise, the insertelement doesn't modify the value, recurse on its
-    // vector input.
-    return FindScalarElement(III->getOperand(0), EltNo);
-  }
-
-  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
-    unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
-    int InEl = SVI->getMaskValue(EltNo);
-    if (InEl < 0)
-      return UndefValue::get(VTy->getElementType());
-    if (InEl < (int)LHSWidth)
-      return FindScalarElement(SVI->getOperand(0), InEl);
-    return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
-  }
-
-  // Extract a value from a vector add operation with a constant zero.
-  Value *Val = nullptr; Constant *Con = nullptr;
-  if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
-    if (Con->getAggregateElement(EltNo)->isNullValue())
-      return FindScalarElement(Val, EltNo);
-  }
-
-  // Otherwise, we don't know.
-  return nullptr;
-}
-
 // If we have a PHI node with a vector type that has only 2 uses: feed
 // itself and be an operand of extractelement at a constant location,
 // try to replace the PHI of the vector type with a PHI of a scalar type.
@@ -178,6 +130,10 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
 }
 
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+  if (Value *V = SimplifyExtractElementInst(
+          EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC))
+    return ReplaceInstUsesWith(EI, V);
+
   // If vector val is constant with all elements the same, replace EI with
   // that element.  We handle a known element # below.
   if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
@@ -190,10 +146,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     unsigned IndexVal = IdxC->getZExtValue();
     unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
 
-    // If this is extracting an invalid index, turn this into undef, to avoid
-    // crashing the code below.
-    if (IndexVal >= VectorWidth)
-      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+    // InstSimplify handles cases where the index is invalid.
+    assert(IndexVal < VectorWidth);
 
     // This instruction only demands the single element from the input vector.
     // If the input vector has a single use, simplify it based on this use
@@ -209,16 +163,13 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       }
     }
 
-    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
-      return ReplaceInstUsesWith(EI, Elt);
-
     // If the this extractelement is directly using a bitcast from a vector of
     // the same number of elements, see if we can find the source element from
     // it.  In this case, we will end up needing to bitcast the scalars.
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
       if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
         if (VT->getNumElements() == VectorWidth)
-          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+          if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal))
             return new BitCastInst(Elt, EI.getType());
     }
 
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 2a81689f7449..fd34a244f271 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2174,16 +2174,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
   if (!EV.hasIndices())
     return ReplaceInstUsesWith(EV, Agg);
 
-  if (Constant *C = dyn_cast<Constant>(Agg)) {
-    if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) {
-      if (EV.getNumIndices() == 0)
-        return ReplaceInstUsesWith(EV, C2);
-      // Extract the remaining indices out of the constant indexed by the
-      // first index
-      return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
-    }
-    return nullptr; // Can't handle other constants
-  }
+  if (Value *V =
+          SimplifyExtractValueInst(Agg, EV.getIndices(), DL, TLI, DT, AC))
+    return ReplaceInstUsesWith(EV, V);
 
   if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
     // We're extracting from an insertvalue instruction, compare the indices
@@ -2972,8 +2965,9 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
 
 static bool
 combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
-                                AssumptionCache &AC, TargetLibraryInfo &TLI,
-                                DominatorTree &DT, LoopInfo *LI = nullptr) {
+                                AliasAnalysis *AA, AssumptionCache &AC,
+                                TargetLibraryInfo &TLI, DominatorTree &DT,
+                                LoopInfo *LI = nullptr) {
   // Minimizing size?
   bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
   auto &DL = F.getParent()->getDataLayout();
@@ -2998,7 +2992,8 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
     if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
       Changed = true;
 
-    InstCombiner IC(Worklist, &Builder, MinimizeSize, &AC, &TLI, &DT, DL, LI);
+    InstCombiner IC(Worklist, &Builder, MinimizeSize,
+                    AA, &AC, &TLI, &DT, DL, LI);
     if (IC.run())
       Changed = true;
 
@@ -3017,7 +3012,8 @@ PreservedAnalyses InstCombinePass::run(Function &F,
 
   auto *LI = AM->getCachedResult<LoopAnalysis>(F);
 
-  if (!combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI))
+  // FIXME: The AliasAnalysis is not yet supported in the new pass manager
+  if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, LI))
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
 
@@ -3050,6 +3046,7 @@ public:
 
 void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
   AU.addRequired<AssumptionCacheTracker>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
   AU.addRequired<DominatorTreeWrapperPass>();
@@ -3061,6 +3058,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
     return false;
 
   // Required analyses.
+  auto AA = &getAnalysis<AliasAnalysis>();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -3069,7 +3067,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
 
-  return combineInstructionsOverFunction(F, Worklist, AC, TLI, DT, LI);
+  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, LI);
 }
 
 char InstructionCombiningPass::ID = 0;
@@ -3078,6 +3076,7 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
                     "Combine redundant instructions", false, false)
 
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 60903c8b4aaf..d1eba6e70e57 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -656,11 +656,14 @@ namespace {
       LeaderTableEntry* Prev = nullptr;
       LeaderTableEntry* Curr = &LeaderTable[N];
 
-      while (Curr->Val != I || Curr->BB != BB) {
+      while (Curr && (Curr->Val != I || Curr->BB != BB)) {
         Prev = Curr;
         Curr = Curr->Next;
       }
 
+      if (!Curr)
+        return;
+
       if (Prev) {
         Prev->Next = Curr->Next;
       } else {
@@ -1304,11 +1307,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   if (V->getType()->getScalarType()->isPointerTy()) {
     AliasAnalysis *AA = gvn.getAliasAnalysis();
 
-    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
-      AA->copyValue(LI, NewPHIs[i]);
-
-    // Now that we've copied information to the new PHIs, scan through
-    // them again and inform alias analysis that we've added potentially
+    // Scan the new PHIs and inform alias analysis that we've added potentially
     // escaping uses to any values that are operands to these PHIs.
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
       PHINode *P = NewPHIs[i];
@@ -1796,7 +1795,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
     // In general, GVN unifies expressions over different control-flow
     // regions, and so we need a conservative combination of the noalias
     // scopes.
-    unsigned KnownIDs[] = {
+    static const unsigned KnownIDs[] = {
       LLVMContext::MD_tbaa,
       LLVMContext::MD_alias_scope,
       LLVMContext::MD_noalias,
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6f0375487af6..2a954d9961f2 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -135,6 +136,10 @@ namespace {
                                      PHINode *IndVar, SCEVExpander &Rewriter);
 
     void SinkUnusedInvariants(Loop *L);
+
+    Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
+                              Instruction *InsertPt, Type *Ty,
+                              bool &IsHighCostExpansion);
   };
 }
 
@@ -496,6 +501,52 @@ struct RewritePhi {
 };
 }
 
+Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
+                                          Loop *L, Instruction *InsertPt,
+                                          Type *ResultTy,
+                                          bool &IsHighCostExpansion) {
+  using namespace llvm::PatternMatch;
+
+  if (!Rewriter.isHighCostExpansion(S, L)) {
+    IsHighCostExpansion = false;
+    return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
+  }
+
+  // Before expanding S into an expensive LLVM expression, see if we can use an
+  // already existing value as the expansion for S.  There is potential to make
+  // this significantly smarter, but this simple heuristic already gets some
+  // interesting cases.
+
+  SmallVector<BasicBlock *, 4> Latches;
+  L->getLoopLatches(Latches);
+
+  for (BasicBlock *BB : Latches) {
+    ICmpInst::Predicate Pred;
+    Instruction *LHS, *RHS;
+    BasicBlock *TrueBB, *FalseBB;
+
+    if (!match(BB->getTerminator(),
+               m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+                    TrueBB, FalseBB)))
+      continue;
+
+    if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
+      IsHighCostExpansion = false;
+      return LHS;
+    }
+
+    if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
+      IsHighCostExpansion = false;
+      return RHS;
+    }
+  }
+
+  // We didn't find anything, fall back to using SCEVExpander.
+  assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
+  IsHighCostExpansion = true;
+  return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
+}
+
 //===----------------------------------------------------------------------===//
 // RewriteLoopExitValues - Optimize IV users outside the loop.
 // As a side effect, reduces the amount of IV processing within the loop.
@@ -628,7 +679,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
             continue;
         }
 
-        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+        bool HighCost = false;
+        Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
+                                            PN->getType(), HighCost);
 
         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
                      << "  LoopVal = " << *Inst << "\n");
@@ -637,7 +690,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
           DeadInsts.push_back(ExitVal);
           continue;
         }
-        bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L);
 
         // Collect all the candidate PHINodes to be rewritten.
         RewritePhiSet.push_back(
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index f0e6d641b180..43fc50e588f8 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -602,7 +602,8 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
   // the instruction.
   while (!I.use_empty()) {
-    Instruction *User = I.user_back();
+    Value::user_iterator UI = I.user_begin();
+    auto *User = cast<Instruction>(*UI);
     if (!DT->isReachableFromEntry(User->getParent())) {
       User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
       continue;
@@ -610,6 +611,16 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT,
     // The user must be a PHI node.
     PHINode *PN = cast<PHINode>(User);
 
+    // Surprisingly, instructions can be used outside of loops without any
+    // exits.  This can only happen in PHI nodes if the incoming block is
+    // unreachable.
+    Use &U = UI.getUse();
+    BasicBlock *BB = PN->getIncomingBlock(U);
+    if (!DT->isReachableFromEntry(BB)) {
+      U = UndefValue::get(I.getType());
+      continue;
+    }
+
     BasicBlock *ExitBlock = PN->getParent();
     assert(ExitBlockSet.count(ExitBlock) &&
            "The LCSSA PHI is not in an exit block!");
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index 0325d268c325..1b9859b57790 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
 #include <list>
 
 #define LDIST_NAME "loop-distribute"
@@ -55,70 +56,6 @@ static cl::opt<bool> DistributeNonIfConvertible(
 
 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
 
-/// \brief Remaps instructions in a loop including the preheader.
-static void remapInstructionsInLoop(const SmallVectorImpl<BasicBlock *> &Blocks,
-                                    ValueToValueMapTy &VMap) {
-  // Rewrite the code to refer to itself.
-  for (auto *BB : Blocks)
-    for (auto &Inst : *BB)
-      RemapInstruction(&Inst, VMap,
-                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
-}
-
-/// \brief Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p
-/// Blocks.
-///
-/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
-/// \p LoopDomBB.  Insert the new blocks before block specified in \p Before.
-static Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
-                                    Loop *OrigLoop, ValueToValueMapTy &VMap,
-                                    const Twine &NameSuffix, LoopInfo *LI,
-                                    DominatorTree *DT,
-                                    SmallVectorImpl<BasicBlock *> &Blocks) {
-  Function *F = OrigLoop->getHeader()->getParent();
-  Loop *ParentLoop = OrigLoop->getParentLoop();
-
-  Loop *NewLoop = new Loop();
-  if (ParentLoop)
-    ParentLoop->addChildLoop(NewLoop);
-  else
-    LI->addTopLevelLoop(NewLoop);
-
-  BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
-  BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
-  // To rename the loop PHIs.
-  VMap[OrigPH] = NewPH;
-  Blocks.push_back(NewPH);
-
-  // Update LoopInfo.
-  if (ParentLoop)
-    ParentLoop->addBasicBlockToLoop(NewPH, *LI);
-
-  // Update DominatorTree.
-  DT->addNewBlock(NewPH, LoopDomBB);
-
-  for (BasicBlock *BB : OrigLoop->getBlocks()) {
-    BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
-    VMap[BB] = NewBB;
-
-    // Update LoopInfo.
-    NewLoop->addBasicBlockToLoop(NewBB, *LI);
-
-    // Update DominatorTree.
-    BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
-    DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
-
-    Blocks.push_back(NewBB);
-  }
-
-  // Move them physically from the end of the block list.
-  F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
-  F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
-                                NewLoop->getHeader(), F->end());
-
-  return NewLoop;
-}
-
 namespace {
 /// \brief Maintains the set of instructions of the loop for a partition before
 /// cloning.  After cloning, it hosts the new loop.
@@ -204,7 +141,9 @@ public:
   ValueToValueMapTy &getVMap() { return VMap; }
 
   /// \brief Remaps the cloned instructions using VMap.
-  void remapInstructions() { remapInstructionsInLoop(ClonedLoopBlocks, VMap); }
+  void remapInstructions() {
+    remapInstructionsInBlocks(ClonedLoopBlocks, VMap);
+  }
 
   /// \brief Based on the set of instructions selected for this partition,
   /// removes the unnecessary ones.
@@ -493,15 +432,14 @@ public:
   /// partitions its entry is set to -1.
   SmallVector<int, 8>
   computePartitionSetForPointers(const LoopAccessInfo &LAI) {
-    const LoopAccessInfo::RuntimePointerCheck *RtPtrCheck =
-        LAI.getRuntimePointerCheck();
+    const RuntimePointerChecking *RtPtrCheck = LAI.getRuntimePointerChecking();
 
     unsigned N = RtPtrCheck->Pointers.size();
     SmallVector<int, 8> PtrToPartitions(N);
     for (unsigned I = 0; I < N; ++I) {
-      Value *Ptr = RtPtrCheck->Pointers[I];
+      Value *Ptr = RtPtrCheck->Pointers[I].PointerValue;
       auto Instructions =
-          LAI.getInstructionsForAccess(Ptr, RtPtrCheck->IsWritePtr[I]);
+          LAI.getInstructionsForAccess(Ptr, RtPtrCheck->Pointers[I].IsWritePtr);
 
       int &Partition = PtrToPartitions[I];
       // First set it to uninitialized.
@@ -629,121 +567,6 @@ private:
   AccessesType Accesses;
 };
 
-/// \brief Handles the loop versioning based on memchecks.
-class LoopVersioning {
-public:
-  LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
-                 DominatorTree *DT,
-                 const SmallVector<int, 8> *PtrToPartition = nullptr)
-      : VersionedLoop(L), NonVersionedLoop(nullptr),
-        PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {}
-
-  /// \brief Returns true if we need memchecks to disambiguate may-aliasing
-  /// accesses.
-  bool needsRuntimeChecks() const {
-    return LAI.getRuntimePointerCheck()->needsAnyChecking(PtrToPartition);
-  }
-
-  /// \brief Performs the CFG manipulation part of versioning the loop including
-  /// the DominatorTree and LoopInfo updates.
-  void versionLoop(Pass *P) {
-    Instruction *FirstCheckInst;
-    Instruction *MemRuntimeCheck;
-    // Add the memcheck in the original preheader (this is empty initially).
-    BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
-    std::tie(FirstCheckInst, MemRuntimeCheck) =
-        LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
-    assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
-
-    // Rename the block to make the IR more readable.
-    MemCheckBB->setName(VersionedLoop->getHeader()->getName() +
-                        ".lver.memcheck");
-
-    // Create empty preheader for the loop (and after cloning for the
-    // non-versioned loop).
-    BasicBlock *PH =
-        SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
-    PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
-
-    // Clone the loop including the preheader.
-    //
-    // FIXME: This does not currently preserve SimplifyLoop because the exit
-    // block is a join between the two loops.
-    SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
-    NonVersionedLoop =
-        cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap,
-                               ".lver.orig", LI, DT, NonVersionedLoopBlocks);
-    remapInstructionsInLoop(NonVersionedLoopBlocks, VMap);
-
-    // Insert the conditional branch based on the result of the memchecks.
-    Instruction *OrigTerm = MemCheckBB->getTerminator();
-    BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
-                       VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
-                       OrigTerm);
-    OrigTerm->eraseFromParent();
-
-    // The loops merge in the original exit block.  This is now dominated by the
-    // memchecking block.
-    DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
-  }
-
-  /// \brief Adds the necessary PHI nodes for the versioned loops based on the
-  /// loop-defined values used outside of the loop.
-  void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
-    BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
-    assert(PHIBlock && "No single successor to loop exit block");
-
-    for (auto *Inst : DefsUsedOutside) {
-      auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
-      PHINode *PN;
-
-      // First see if we have a single-operand PHI with the value defined by the
-      // original loop.
-      for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
-        assert(PN->getNumOperands() == 1 &&
-               "Exit block should only have on predecessor");
-        if (PN->getIncomingValue(0) == Inst)
-          break;
-      }
-      // If not create it.
-      if (!PN) {
-        PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
-                             PHIBlock->begin());
-        for (auto *User : Inst->users())
-          if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
-            User->replaceUsesOfWith(Inst, PN);
-        PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
-      }
-      // Add the new incoming value from the non-versioned loop.
-      PN->addIncoming(NonVersionedLoopInst,
-                      NonVersionedLoop->getExitingBlock());
-    }
-  }
-
-private:
-  /// \brief The original loop.  This becomes the "versioned" one, i.e. control
-  /// goes if the memchecks all pass.
-  Loop *VersionedLoop;
-  /// \brief The fall-back loop, i.e. if any of the memchecks fail.
-  Loop *NonVersionedLoop;
-
-  /// \brief For each memory pointer it contains the partitionId it is used in.
-  /// If nullptr, no partitioning is used.
-  ///
-  /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
-  /// If the pointer is used in multiple partitions the entry is set to -1.
-  const SmallVector<int, 8> *PtrToPartition;
-
-  /// \brief This maps the instructions from VersionedLoop to their counterpart
-  /// in NonVersionedLoop.
-  ValueToValueMapTy VMap;
-
-  /// \brief Analyses used.
-  const LoopAccessInfo &LAI;
-  LoopInfo *LI;
-  DominatorTree *DT;
-};
-
 /// \brief Returns the instructions that use values defined in the loop.
 static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
   SmallVector<Instruction *, 8> UsedOutside;
@@ -929,7 +752,7 @@ private:
     LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
     if (LVer.needsRuntimeChecks()) {
       DEBUG(dbgs() << "\nPointers:\n");
-      DEBUG(LAI.getRuntimePointerCheck()->print(dbgs(), 0, &PtrToPartition));
+      DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition));
       LVer.versionLoop(this);
       LVer.addPHINodes(DefsUsedOutside);
     }
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 714ce914a8b3..a21ca2417ca1 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -508,7 +508,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
 
     ICmpInst *NewPreCond =
       cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
-    PreCond->replaceAllUsesWith(NewPreCond);
+    PreCondBr->setCondition(NewPreCond);
 
     RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
   }
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 25546553fd4d..9d7e57ffebac 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -282,21 +282,21 @@ static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
   DEBUG(dbgs() << "Calling populateWorklist called\n");
   LoopVector LoopList;
   Loop *CurrentLoop = &L;
-  std::vector<Loop *> vec = CurrentLoop->getSubLoopsVector();
-  while (vec.size() != 0) {
+  const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
+  while (!Vec->empty()) {
     // The current loop has multiple subloops in it hence it is not tightly
     // nested.
     // Discard all loops above it added into Worklist.
-    if (vec.size() != 1) {
+    if (Vec->size() != 1) {
       LoopList.clear();
       return;
     }
     LoopList.push_back(CurrentLoop);
-    CurrentLoop = *(vec.begin());
-    vec = CurrentLoop->getSubLoopsVector();
+    CurrentLoop = Vec->front();
+    Vec = &CurrentLoop->getSubLoops();
   }
   LoopList.push_back(CurrentLoop);
-  V.push_back(LoopList);
+  V.push_back(std::move(LoopList));
 }
 
 static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 9e7558d9c45f..d78db6c369b3 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -840,8 +840,10 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Reduce count based on the type of unrolling and the threshold values.
   unsigned OriginalCount = Count;
-  bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
-  if (HasRuntimeUnrollDisablePragma(L)) {
+  bool AllowRuntime =
+      (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
+  // Don't unroll a runtime trip count loop with unroll full pragma.
+  if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
     AllowRuntime = false;
   }
   if (Unrolling == Partial) {
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 243db8d70ca2..643f3740eedd 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -301,10 +301,6 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
   // Merged instruction
   Instruction *HoistedInst = HoistCand->clone();
 
-  // Notify AA of the new value.
-  if (isa<LoadInst>(HoistCand))
-    AA->copyValue(HoistCand, HoistedInst);
-
   // Hoist instruction.
   HoistedInst->insertBefore(HoistPt);
 
@@ -451,9 +447,6 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
     NewPN->addIncoming(Opd1, S0->getParent());
     NewPN->addIncoming(Opd2, S1->getParent());
     if (NewPN->getType()->getScalarType()->isPointerTy()) {
-      // Notify AA of the new value.
-      AA->copyValue(Opd1, NewPN);
-      AA->copyValue(Opd2, NewPN);
       // AA needs to be informed when a PHI-use of the pointer value is added
       for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
         unsigned J = PHINode::getOperandNumForIncomingValue(I);
@@ -491,7 +484,6 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
     // Create the new store to be inserted at the join point.
     StoreInst *SNew = (StoreInst *)(S0->clone());
     Instruction *ANew = A0->clone();
-    AA->copyValue(S0, SNew);
     SNew->insertBefore(InsertPt);
     ANew->insertBefore(SNew);
 
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 9ecaf102574a..366301ad731a 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -399,8 +399,8 @@ static bool doesNotRequireEntrySafepointBefore(const CallSite &CS) {
       // at least if they do, are leaf functions that cause only finite stack
       // growth.  In particular, the optimizer likes to form things like memsets
       // out of stores in the original IR.  Another important example is
-      // llvm.frameescape which must occur in the entry block.  Inserting a
-      // safepoint before it is not legal since it could push the frameescape
+      // llvm.localescape which must occur in the entry block.  Inserting a
+      // safepoint before it is not legal since it could push the localescape
       // out of the entry block.
       return true;
     }
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 305175ff8f73..4d3a708fa20e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1799,11 +1799,10 @@ bool IPSCCP::runOnModule(Module &M) {
         if (!TI->use_empty())
           TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
         TI->eraseFromParent();
+        new UnreachableInst(M.getContext(), BB);
 
         if (&*BB != &F->front())
           BlocksToErase.push_back(BB);
-        else
-          new UnreachableInst(M.getContext(), BB);
         continue;
       }
 
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 056dd11b5ab3..d1a0a82b9b08 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2593,13 +2593,21 @@ private:
       V = rewriteIntegerLoad(LI);
     } else if (NewBeginOffset == NewAllocaBeginOffset &&
                canConvertValue(DL, NewAllocaTy, LI.getType())) {
-      V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(),
-                                LI.getName());
+      LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                              LI.isVolatile(), LI.getName());
+      if (LI.isVolatile())
+        NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+      V = NewLI;
     } else {
       Type *LTy = TargetTy->getPointerTo();
-      V = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
-                                getSliceAlign(TargetTy), LI.isVolatile(),
-                                LI.getName());
+      LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
+                                              getSliceAlign(TargetTy),
+                                              LI.isVolatile(), LI.getName());
+      if (LI.isVolatile())
+        NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+      V = NewLI;
       IsPtrAdjusted = true;
     }
     V = convertValue(DL, IRB, V, TargetTy);
@@ -2722,7 +2730,8 @@ private:
       NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
                                      SI.isVolatile());
     }
-    (void)NewSI;
+    if (SI.isVolatile())
+      NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
     Pass.DeadInsts.insert(&SI);
     deleteIfTriviallyDead(OldOp);
 
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 53471de6154c..ef7dacac79cb 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -440,8 +440,6 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
     // Create the new PHI node, insert it into NewBB at the end of the block
     PHINode *NewPHI =
         PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
-    if (AA)
-      AA->copyValue(PN, NewPHI);
 
     // NOTE! This loop walks backwards for a reason! First off, this minimizes
     // the cost of removal if we end up removing a large number of values, and
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 470e2d09132e..716e655affb9 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMTransformUtils
   LoopUnroll.cpp
   LoopUnrollRuntime.cpp
   LoopUtils.cpp
+  LoopVersioning.cpp
   LowerInvoke.cpp
   LowerSwitch.cpp
   Mem2Reg.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 4f8d1dfbe5df..cc4d6c6fb192 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
@@ -720,3 +721,68 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                             ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
                             nullptr);
 }
+
+/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
+void llvm::remapInstructionsInBlocks(
+    const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
+  // Rewrite the code to refer to itself.
+  for (auto *BB : Blocks)
+    for (auto &Inst : *BB)
+      RemapInstruction(&Inst, VMap,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
+}
+
+/// \brief Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p
+/// Blocks.
+///
+/// Updates LoopInfo and DominatorTree assuming the loop is dominated by block
+/// \p LoopDomBB.  Insert the new blocks before block specified in \p Before.
+Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
+                                   Loop *OrigLoop, ValueToValueMapTy &VMap,
+                                   const Twine &NameSuffix, LoopInfo *LI,
+                                   DominatorTree *DT,
+                                   SmallVectorImpl<BasicBlock *> &Blocks) {
+  Function *F = OrigLoop->getHeader()->getParent();
+  Loop *ParentLoop = OrigLoop->getParentLoop();
+
+  Loop *NewLoop = new Loop();
+  if (ParentLoop)
+    ParentLoop->addChildLoop(NewLoop);
+  else
+    LI->addTopLevelLoop(NewLoop);
+
+  BasicBlock *OrigPH = OrigLoop->getLoopPreheader();
+  assert(OrigPH && "No preheader");
+  BasicBlock *NewPH = CloneBasicBlock(OrigPH, VMap, NameSuffix, F);
+  // To rename the loop PHIs.
+  VMap[OrigPH] = NewPH;
+  Blocks.push_back(NewPH);
+
+  // Update LoopInfo.
+  if (ParentLoop)
+    ParentLoop->addBasicBlockToLoop(NewPH, *LI);
+
+  // Update DominatorTree.
+  DT->addNewBlock(NewPH, LoopDomBB);
+
+  for (BasicBlock *BB : OrigLoop->getBlocks()) {
+    BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F);
+    VMap[BB] = NewBB;
+
+    // Update LoopInfo.
+    NewLoop->addBasicBlockToLoop(NewBB, *LI);
+
+    // Update DominatorTree.
+    BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock();
+    DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
+
+    Blocks.push_back(NewBB);
+  }
+
+  // Move them physically from the end of the block list.
+  F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
+  F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
+                                NewLoop->getHeader(), F->end());
+
+  return NewLoop;
+}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 56085579b61c..50ca6234d0b7 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -900,13 +900,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
 
   if (auto *GO = dyn_cast<GlobalObject>(V)) {
     // If there is a large requested alignment and we can, bump up the alignment
-    // of the global.
-    if (GO->isDeclaration())
-      return Align;
-    // If the memory we set aside for the global may not be the memory used by
-    // the final program then it is impossible for us to reliably enforce the
-    // preferred alignment.
-    if (GO->isWeakForLinker())
+    // of the global.  If the memory we set aside for the global may not be the
+    // memory used by the final program then it is impossible for us to reliably
+    // enforce the preferred alignment.
+    if (!GO->isStrongDefinitionForLinker())
       return Align;
 
     if (GO->getAlignment() >= PrefAlign)
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 2e7d21cb171f..5c98043e4632 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -403,7 +403,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
     PHINode *PN = cast<PHINode>(I);
     PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
                                      PN->getName()+".be", BETerminator);
-    if (AA) AA->copyValue(PN, NewPN);
 
     // Loop over the PHI node, moving all entries except the one for the
     // preheader over to the new PHI node.
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
new file mode 100644
index 000000000000..832079d2cf63
--- /dev/null
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -0,0 +1,106 @@
+//===- LoopVersioning.cpp - Utility to version a loop ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a utility class to perform loop versioning.  The versioned
+// loop speculates that otherwise may-aliasing memory accesses don't overlap and
+// emits checks to prove this.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+
+using namespace llvm;
+
+LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
+                               DominatorTree *DT,
+                               const SmallVector<int, 8> *PtrToPartition)
+    : VersionedLoop(L), NonVersionedLoop(nullptr),
+      PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {
+  assert(L->getExitBlock() && "No single exit block");
+  assert(L->getLoopPreheader() && "No preheader");
+}
+
+bool LoopVersioning::needsRuntimeChecks() const {
+  return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition);
+}
+
+void LoopVersioning::versionLoop(Pass *P) {
+  Instruction *FirstCheckInst;
+  Instruction *MemRuntimeCheck;
+  // Add the memcheck in the original preheader (this is empty initially).
+  BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
+  std::tie(FirstCheckInst, MemRuntimeCheck) =
+      LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
+  assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
+
+  // Rename the block to make the IR more readable.
+  MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck");
+
+  // Create empty preheader for the loop (and after cloning for the
+  // non-versioned loop).
+  BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
+  PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
+
+  // Clone the loop including the preheader.
+  //
+  // FIXME: This does not currently preserve SimplifyLoop because the exit
+  // block is a join between the two loops.
+  SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
+  NonVersionedLoop =
+      cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig",
+                             LI, DT, NonVersionedLoopBlocks);
+  remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
+
+  // Insert the conditional branch based on the result of the memchecks.
+  Instruction *OrigTerm = MemCheckBB->getTerminator();
+  BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
+                     VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
+                     OrigTerm);
+  OrigTerm->eraseFromParent();
+
+  // The loops merge in the original exit block.  This is now dominated by the
+  // memchecking block.
+  DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
+}
+
+void LoopVersioning::addPHINodes(
+    const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
+  BasicBlock *PHIBlock = VersionedLoop->getExitBlock();
+  assert(PHIBlock && "No single successor to loop exit block");
+
+  for (auto *Inst : DefsUsedOutside) {
+    auto *NonVersionedLoopInst = cast<Instruction>(VMap[Inst]);
+    PHINode *PN;
+
+    // First see if we have a single-operand PHI with the value defined by the
+    // original loop.
+    for (auto I = PHIBlock->begin(); (PN = dyn_cast<PHINode>(I)); ++I) {
+      assert(PN->getNumOperands() == 1 &&
+             "Exit block should only have on predecessor");
+      if (PN->getIncomingValue(0) == Inst)
+        break;
+    }
+    // If not create it.
+    if (!PN) {
+      PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
+                           PHIBlock->begin());
+      for (auto *User : Inst->users())
+        if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
+          User->replaceUsesOfWith(Inst, PN);
+      PN->addIncoming(Inst, VersionedLoop->getExitingBlock());
+    }
+    // Add the new incoming value from the non-versioned loop.
+    PN->addIncoming(NonVersionedLoopInst, NonVersionedLoop->getExitingBlock());
+  }
+}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5ba14174ac79..69ca2688c810 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -148,8 +148,9 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
     cl::desc("Maximum factor for an interleaved access group (default = 8)"),
     cl::init(8));
 
-/// We don't unroll loops with a known constant trip count below this number.
-static const unsigned TinyTripCountUnrollThreshold = 128;
+/// We don't interleave loops with a known constant trip count below this
+/// number.
+static const unsigned TinyTripCountInterleaveThreshold = 128;
 
 static cl::opt<unsigned> ForceTargetNumScalarRegs(
     "force-target-num-scalar-regs", cl::init(0), cl::Hidden,
@@ -180,7 +181,8 @@ static cl::opt<unsigned> ForceTargetInstructionCost(
 
 static cl::opt<unsigned> SmallLoopCost(
     "small-loop-cost", cl::init(20), cl::Hidden,
-    cl::desc("The cost of a loop that is considered 'small' by the unroller."));
+    cl::desc(
+        "The cost of a loop that is considered 'small' by the interleaver."));
 
 static cl::opt<bool> LoopVectorizeWithBlockFrequency(
     "loop-vectorize-with-block-frequency", cl::init(false), cl::Hidden,
@@ -188,10 +190,11 @@ static cl::opt<bool> LoopVectorizeWithBlockFrequency(
              "heuristics minimizing code growth in cold regions and being more "
              "aggressive in hot regions."));
 
-// Runtime unroll loops for load/store throughput.
-static cl::opt<bool> EnableLoadStoreRuntimeUnroll(
-    "enable-loadstore-runtime-unroll", cl::init(true), cl::Hidden,
-    cl::desc("Enable runtime unrolling until load/store ports are saturated"));
+// Runtime interleave loops for load/store throughput.
+static cl::opt<bool> EnableLoadStoreRuntimeInterleave(
+    "enable-loadstore-runtime-interleave", cl::init(true), cl::Hidden,
+    cl::desc(
+        "Enable runtime interleaving until load/store ports are saturated"));
 
 /// The number of stores in a loop that are allowed to need predication.
 static cl::opt<unsigned> NumberOfStoresToPredicate(
@@ -200,15 +203,15 @@ static cl::opt<unsigned> NumberOfStoresToPredicate(
 
 static cl::opt<bool> EnableIndVarRegisterHeur(
     "enable-ind-var-reg-heur", cl::init(true), cl::Hidden,
-    cl::desc("Count the induction variable only once when unrolling"));
+    cl::desc("Count the induction variable only once when interleaving"));
 
 static cl::opt<bool> EnableCondStoresVectorization(
     "enable-cond-stores-vec", cl::init(false), cl::Hidden,
     cl::desc("Enable if predication of stores during vectorization."));
 
-static cl::opt<unsigned> MaxNestedScalarReductionUF(
-    "max-nested-scalar-reduction-unroll", cl::init(2), cl::Hidden,
-    cl::desc("The maximum unroll factor to use when unrolling a scalar "
+static cl::opt<unsigned> MaxNestedScalarReductionIC(
+    "max-nested-scalar-reduction-interleave", cl::init(2), cl::Hidden,
+    cl::desc("The maximum interleave count to use when interleaving a scalar "
              "reduction in a nested loop."));
 
 namespace {
@@ -921,8 +924,8 @@ public:
   bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
 
   /// Returns the information that we collected about runtime memory check.
-  const LoopAccessInfo::RuntimePointerCheck *getRuntimePointerCheck() const {
-    return LAI->getRuntimePointerCheck();
+  const RuntimePointerChecking *getRuntimePointerChecking() const {
+    return LAI->getRuntimePointerChecking();
   }
 
   const LoopAccessInfo *getLAI() const {
@@ -1105,12 +1108,19 @@ public:
   /// 64 bit loop indices.
   unsigned getWidestType();
 
+  /// \return The desired interleave count.
+  /// If interleave count has been specified by metadata it will be returned.
+  /// Otherwise, the interleave count is computed and returned. VF and LoopCost
+  /// are the selected vectorization factor and the cost of the selected VF.
+  unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
+                                 unsigned LoopCost);
+
   /// \return The most profitable unroll factor.
-  /// If UserUF is non-zero then this method finds the best unroll-factor
-  /// based on register pressure and other parameters.
-  /// VF and LoopCost are the selected vectorization factor and the cost of the
-  /// selected VF.
-  unsigned selectUnrollFactor(bool OptForSize, unsigned VF, unsigned LoopCost);
+  /// This method finds the best unroll-factor based on register pressure and
+  /// other parameters. VF and LoopCost are the selected vectorization factor
+  /// and the cost of the selected VF.
+  unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
+                                  unsigned LoopCost);
 
   /// \brief A struct that represents some properties of the register usage
   /// of a loop.
@@ -1456,9 +1466,14 @@ struct LoopVectorize : public FunctionPass {
     const BranchProbability ColdProb(1, 5); // 20%
     ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb;
 
-    // If the target claims to have no vector registers don't attempt
-    // vectorization.
-    if (!TTI->getNumberOfRegisters(true))
+    // Don't attempt if
+    // 1. the target claims to have no vector registers, and
+    // 2. interleaving won't help ILP.
+    //
+    // The second condition is necessary because, even if the target has no
+    // vector registers, loop vectorization may still enable scalar
+    // interleaving.
+    if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
       return false;
 
     // Build up a worklist of inner-loops to vectorize. This is necessary as
@@ -1633,18 +1648,17 @@ struct LoopVectorize : public FunctionPass {
     const LoopVectorizationCostModel::VectorizationFactor VF =
         CM.selectVectorizationFactor(OptForSize);
 
-    // Select the unroll factor.
-    const unsigned UF =
-        CM.selectUnrollFactor(OptForSize, VF.Width, VF.Cost);
+    // Select the interleave count.
+    unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
 
     DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
                  << DebugLocStr << '\n');
-    DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
+    DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
 
     if (VF.Width == 1) {
       DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
 
-      if (UF == 1) {
+      if (IC == 1) {
         emitOptimizationRemarkAnalysis(
             F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
             "not beneficial to vectorize and user disabled interleaving");
@@ -1654,17 +1668,14 @@ struct LoopVectorize : public FunctionPass {
 
       // Report the unrolling decision.
       emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
-                             Twine("unrolled with interleaving factor " +
-                                   Twine(UF) +
+                             Twine("interleaved by " + Twine(IC) +
                                    " (vectorization not beneficial)"));
 
-      // We decided not to vectorize, but we may want to unroll.
-
-      InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, UF);
+      InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC);
       Unroller.vectorize(&LVL);
     } else {
       // If we decided that it is *legal* to vectorize the loop then do it.
-      InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, UF);
+      InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC);
       LB.vectorize(&LVL);
       ++LoopsVectorized;
 
@@ -1675,10 +1686,10 @@ struct LoopVectorize : public FunctionPass {
         AddRuntimeUnrollDisableMetaData(L);
 
       // Report the vectorization decision.
-      emitOptimizationRemark(
-          F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
-          Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) +
-              ", unrolling interleave factor: " + Twine(UF) + ")");
+      emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+                             Twine("vectorized loop (vectorization width: ") +
+                                 Twine(VF.Width) + ", interleaved count: " +
+                                 Twine(IC) + ")");
     }
 
     // Mark the loop as already vectorized to avoid vectorizing again.
@@ -1760,31 +1771,6 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
   return Builder.CreateAdd(Val, Step, "induction");
 }
 
-/// \brief Find the operand of the GEP that should be checked for consecutive
-/// stores. This ignores trailing indices that have no effect on the final
-/// pointer.
-static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
-  const DataLayout &DL = Gep->getModule()->getDataLayout();
-  unsigned LastOperand = Gep->getNumOperands() - 1;
-  unsigned GEPAllocSize = DL.getTypeAllocSize(
-      cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
-
-  // Walk backwards and try to peel off zeros.
-  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
-    // Find the type we're currently indexing into.
-    gep_type_iterator GEPTI = gep_type_begin(Gep);
-    std::advance(GEPTI, LastOperand - 1);
-
-    // If it's a type with the same allocation size as the result of the GEP we
-    // can peel off the zero index.
-    if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
-      break;
-    --LastOperand;
-  }
-
-  return LastOperand;
-}
-
 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
   // Make sure that the pointer does not point to structs.
@@ -2503,9 +2489,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
    */
 
   BasicBlock *OldBasicBlock = OrigLoop->getHeader();
-  BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
+  BasicBlock *VectorPH = OrigLoop->getLoopPreheader();
   BasicBlock *ExitBlock = OrigLoop->getExitBlock();
-  assert(BypassBlock && "Invalid loop structure");
+  assert(VectorPH && "Invalid loop structure");
   assert(ExitBlock && "Must have an exit block");
 
   // Some loops have a single integer induction variable, while other loops
@@ -2545,44 +2531,35 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // loop.
   Value *BackedgeCount =
       Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
-                        BypassBlock->getTerminator());
+                        VectorPH->getTerminator());
   if (BackedgeCount->getType()->isPointerTy())
     BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
                                                 "backedge.ptrcnt.to.int",
-                                                BypassBlock->getTerminator());
+                                                VectorPH->getTerminator());
   Instruction *CheckBCOverflow =
       CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
                       Constant::getAllOnesValue(BackedgeCount->getType()),
-                      "backedge.overflow", BypassBlock->getTerminator());
+                      "backedge.overflow", VectorPH->getTerminator());
 
   // The loop index does not have to start at Zero. Find the original start
   // value from the induction PHI node. If we don't have an induction variable
   // then we know that it starts at zero.
-  Builder.SetInsertPoint(BypassBlock->getTerminator());
-  Value *StartIdx = ExtendedIdx = OldInduction ?
-    Builder.CreateZExt(OldInduction->getIncomingValueForBlock(BypassBlock),
-                       IdxTy):
-    ConstantInt::get(IdxTy, 0);
-
-  // We need an instruction to anchor the overflow check on. StartIdx needs to
-  // be defined before the overflow check branch. Because the scalar preheader
-  // is going to merge the start index and so the overflow branch block needs to
-  // contain a definition of the start index.
-  Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd(
-      StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor",
-      BypassBlock->getTerminator());
+  Builder.SetInsertPoint(VectorPH->getTerminator());
+  Value *StartIdx = ExtendedIdx =
+      OldInduction
+          ? Builder.CreateZExt(OldInduction->getIncomingValueForBlock(VectorPH),
+                               IdxTy)
+          : ConstantInt::get(IdxTy, 0);
 
   // Count holds the overall loop count (N).
   Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
-                                   BypassBlock->getTerminator());
+                                   VectorPH->getTerminator());
 
-  LoopBypassBlocks.push_back(BypassBlock);
+  LoopBypassBlocks.push_back(VectorPH);
 
   // Split the single block loop into the two loop structure described above.
-  BasicBlock *VectorPH =
-  BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
   BasicBlock *VecBody =
-  VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
+      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
   BasicBlock *MiddleBlock =
   VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
   BasicBlock *ScalarPH =
@@ -2597,7 +2574,6 @@ void InnerLoopVectorizer::createEmptyLoop() {
   if (ParentLoop) {
     ParentLoop->addChildLoop(Lp);
     ParentLoop->addBasicBlockToLoop(ScalarPH, *LI);
-    ParentLoop->addBasicBlockToLoop(VectorPH, *LI);
     ParentLoop->addBasicBlockToLoop(MiddleBlock, *LI);
   } else {
     LI->addTopLevelLoop(Lp);
@@ -2615,9 +2591,20 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // times the unroll factor (num of SIMD instructions).
   Constant *Step = ConstantInt::get(IdxTy, VF * UF);
 
+  // Generate code to check that the loop's trip count that we computed by
+  // adding one to the backedge-taken count will not overflow.
+  BasicBlock *NewVectorPH =
+      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked");
+  if (ParentLoop)
+    ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+  ReplaceInstWithInst(
+      VectorPH->getTerminator(),
+      BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow));
+  VectorPH = NewVectorPH;
+
   // This is the IR builder that we use to add all of the logic for bypassing
   // the new vector loop.
-  IRBuilder<> BypassBuilder(BypassBlock->getTerminator());
+  IRBuilder<> BypassBuilder(VectorPH->getTerminator());
   setDebugLocFromInst(BypassBuilder,
                       getDebugLocFromInstOrOperands(OldInduction));
 
@@ -2646,24 +2633,14 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // jump to the scalar loop.
   Value *Cmp =
       BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
-
-  BasicBlock *LastBypassBlock = BypassBlock;
-
-  // Generate code to check that the loops trip count that we computed by adding
-  // one to the backedge-taken count will not overflow.
-  {
-    auto PastOverflowCheck =
-        std::next(BasicBlock::iterator(OverflowCheckAnchor));
-    BasicBlock *CheckBlock =
-      LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
-    if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
-    LoopBypassBlocks.push_back(CheckBlock);
-    ReplaceInstWithInst(
-        LastBypassBlock->getTerminator(),
-        BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow));
-    LastBypassBlock = CheckBlock;
-  }
+  NewVectorPH =
+      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
+  if (ParentLoop)
+    ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+  LoopBypassBlocks.push_back(VectorPH);
+  ReplaceInstWithInst(VectorPH->getTerminator(),
+                      BranchInst::Create(MiddleBlock, NewVectorPH, Cmp));
+  VectorPH = NewVectorPH;
 
   // Generate the code to check that the strides we assumed to be one are really
   // one. We want the new basic block to start at the first instruction in a
@@ -2671,23 +2648,24 @@ void InnerLoopVectorizer::createEmptyLoop() {
   Instruction *StrideCheck;
   Instruction *FirstCheckInst;
   std::tie(FirstCheckInst, StrideCheck) =
-      addStrideCheck(LastBypassBlock->getTerminator());
+      addStrideCheck(VectorPH->getTerminator());
   if (StrideCheck) {
     AddedSafetyChecks = true;
     // Create a new block containing the stride check.
-    BasicBlock *CheckBlock =
-        LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
+    VectorPH->setName("vector.stridecheck");
+    NewVectorPH =
+        VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
     if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
-    LoopBypassBlocks.push_back(CheckBlock);
+      ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+    LoopBypassBlocks.push_back(VectorPH);
 
     // Replace the branch into the memory check block with a conditional branch
     // for the "few elements case".
-    ReplaceInstWithInst(LastBypassBlock->getTerminator(),
-                        BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
+    ReplaceInstWithInst(
+        VectorPH->getTerminator(),
+        BranchInst::Create(MiddleBlock, NewVectorPH, StrideCheck));
 
-    Cmp = StrideCheck;
-    LastBypassBlock = CheckBlock;
+    VectorPH = NewVectorPH;
   }
 
   // Generate the code that checks in runtime if arrays overlap. We put the
@@ -2695,28 +2673,26 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // faster.
   Instruction *MemRuntimeCheck;
   std::tie(FirstCheckInst, MemRuntimeCheck) =
-    Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator());
+      Legal->getLAI()->addRuntimeCheck(VectorPH->getTerminator());
   if (MemRuntimeCheck) {
     AddedSafetyChecks = true;
     // Create a new block containing the memory check.
-    BasicBlock *CheckBlock =
-        LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck");
+    VectorPH->setName("vector.memcheck");
+    NewVectorPH =
+        VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
     if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(CheckBlock, *LI);
-    LoopBypassBlocks.push_back(CheckBlock);
+      ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
+    LoopBypassBlocks.push_back(VectorPH);
 
     // Replace the branch into the memory check block with a conditional branch
     // for the "few elements case".
-    ReplaceInstWithInst(LastBypassBlock->getTerminator(),
-                        BranchInst::Create(MiddleBlock, CheckBlock, Cmp));
+    ReplaceInstWithInst(
+        VectorPH->getTerminator(),
+        BranchInst::Create(MiddleBlock, NewVectorPH, MemRuntimeCheck));
 
-    Cmp = MemRuntimeCheck;
-    LastBypassBlock = CheckBlock;
+    VectorPH = NewVectorPH;
   }
 
-  ReplaceInstWithInst(LastBypassBlock->getTerminator(),
-                      BranchInst::Create(MiddleBlock, VectorPH, Cmp));
-
   // We are going to resume the execution of the scalar loop.
   // Go over all of the induction variables that we found and fix the
   // PHIs that are left in the scalar version of the loop.
@@ -3831,7 +3807,7 @@ bool LoopVectorizationLegality::canVectorize() {
   }
 
   // We can only vectorize innermost loops.
-  if (!TheLoop->getSubLoopsVector().empty()) {
+  if (!TheLoop->empty()) {
     emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
     return false;
   }
@@ -3897,10 +3873,11 @@ bool LoopVectorizationLegality::canVectorize() {
   // Collect all of the variables that remain uniform after vectorization.
   collectLoopUniforms();
 
-  DEBUG(dbgs() << "LV: We can vectorize this loop" <<
-        (LAI->getRuntimePointerCheck()->Need ? " (with a runtime bound check)" :
-         "")
-        <<"!\n");
+  DEBUG(dbgs() << "LV: We can vectorize this loop"
+               << (LAI->getRuntimePointerChecking()->Need
+                       ? " (with a runtime bound check)"
+                       : "")
+               << "!\n");
 
   // Analyze interleaved memory accesses.
   if (EnableInterleavedMemAccesses)
@@ -4130,118 +4107,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
   return true;
 }
 
-///\brief Remove GEPs whose indices but the last one are loop invariant and
-/// return the induction operand of the gep pointer.
-static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
-  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-  if (!GEP)
-    return Ptr;
-
-  unsigned InductionOperand = getGEPInductionOperand(GEP);
-
-  // Check that all of the gep indices are uniform except for our induction
-  // operand.
-  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
-    if (i != InductionOperand &&
-        !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
-      return Ptr;
-  return GEP->getOperand(InductionOperand);
-}
-
-///\brief Look for a cast use of the passed value.
-static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
-  Value *UniqueCast = nullptr;
-  for (User *U : Ptr->users()) {
-    CastInst *CI = dyn_cast<CastInst>(U);
-    if (CI && CI->getType() == Ty) {
-      if (!UniqueCast)
-        UniqueCast = CI;
-      else
-        return nullptr;
-    }
-  }
-  return UniqueCast;
-}
-
-///\brief Get the stride of a pointer access in a loop.
-/// Looks for symbolic strides "a[i*stride]". Returns the symbolic stride as a
-/// pointer to the Value, or null otherwise.
-static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
-  const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
-  if (!PtrTy || PtrTy->isAggregateType())
-    return nullptr;
-
-  // Try to remove a gep instruction to make the pointer (actually index at this
-  // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
-  // pointer, otherwise, we are analyzing the index.
-  Value *OrigPtr = Ptr;
-
-  // The size of the pointer access.
-  int64_t PtrAccessSize = 1;
-
-  Ptr = stripGetElementPtr(Ptr, SE, Lp);
-  const SCEV *V = SE->getSCEV(Ptr);
-
-  if (Ptr != OrigPtr)
-    // Strip off casts.
-    while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
-      V = C->getOperand();
-
-  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
-  if (!S)
-    return nullptr;
-
-  V = S->getStepRecurrence(*SE);
-  if (!V)
-    return nullptr;
-
-  // Strip off the size of access multiplication if we are still analyzing the
-  // pointer.
-  if (OrigPtr == Ptr) {
-    const DataLayout &DL = Lp->getHeader()->getModule()->getDataLayout();
-    DL.getTypeAllocSize(PtrTy->getElementType());
-    if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
-      if (M->getOperand(0)->getSCEVType() != scConstant)
-        return nullptr;
-
-      const APInt &APStepVal =
-          cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
-
-      // Huge step value - give up.
-      if (APStepVal.getBitWidth() > 64)
-        return nullptr;
-
-      int64_t StepVal = APStepVal.getSExtValue();
-      if (PtrAccessSize != StepVal)
-        return nullptr;
-      V = M->getOperand(1);
-    }
-  }
-
-  // Strip off casts.
-  Type *StripedOffRecurrenceCast = nullptr;
-  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
-    StripedOffRecurrenceCast = C->getType();
-    V = C->getOperand();
-  }
-
-  // Look for the loop invariant symbolic value.
-  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
-  if (!U)
-    return nullptr;
-
-  Value *Stride = U->getValue();
-  if (!Lp->isLoopInvariant(Stride))
-    return nullptr;
-
-  // If we have stripped off the recurrence cast we have to make sure that we
-  // return the value that is used in this loop so that we can replace it later.
-  if (StripedOffRecurrenceCast)
-    Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
-
-  return Stride;
-}
-
 void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
   Value *Ptr = nullptr;
   if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
@@ -4585,7 +4450,7 @@ LoopVectorizationCostModel::VectorizationFactor
 LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   // Width 1 means no vectorize
   VectorizationFactor Factor = { 1U, 0U };
-  if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
+  if (OptForSize && Legal->getRuntimePointerChecking()->Need) {
     emitAnalysis(VectorizationReport() <<
                  "runtime pointer checks needed. Enable vectorization of this "
                  "loop with '#pragma clang loop vectorize(enable)' when "
@@ -4745,41 +4610,40 @@ unsigned LoopVectorizationCostModel::getWidestType() {
   return MaxWidth;
 }
 
-unsigned
-LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
-                                               unsigned VF,
-                                               unsigned LoopCost) {
+unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
+                                                           unsigned VF,
+                                                           unsigned LoopCost) {
 
-  // -- The unroll heuristics --
-  // We unroll the loop in order to expose ILP and reduce the loop overhead.
+  // -- The interleave heuristics --
+  // We interleave the loop in order to expose ILP and reduce the loop overhead.
   // There are many micro-architectural considerations that we can't predict
   // at this level. For example, frontend pressure (on decode or fetch) due to
   // code size, or the number and capabilities of the execution ports.
   //
-  // We use the following heuristics to select the unroll factor:
-  // 1. If the code has reductions, then we unroll in order to break the cross
+  // We use the following heuristics to select the interleave count:
+  // 1. If the code has reductions, then we interleave to break the cross
   // iteration dependency.
-  // 2. If the loop is really small, then we unroll in order to reduce the loop
+  // 2. If the loop is really small, then we interleave to reduce the loop
   // overhead.
-  // 3. We don't unroll if we think that we will spill registers to memory due
-  // to the increased register pressure.
+  // 3. We don't interleave if we think that we will spill registers to memory
+  // due to the increased register pressure.
 
   // Use the user preference, unless 'auto' is selected.
   int UserUF = Hints->getInterleave();
   if (UserUF != 0)
     return UserUF;
 
-  // When we optimize for size, we don't unroll.
+  // When we optimize for size, we don't interleave.
   if (OptForSize)
     return 1;
 
-  // We used the distance for the unroll factor.
+  // We used the distance for the interleave count.
   if (Legal->getMaxSafeDepDistBytes() != -1U)
     return 1;
 
-  // Do not unroll loops with a relatively small trip count.
+  // Do not interleave loops with a relatively small trip count.
   unsigned TC = SE->getSmallConstantTripCount(TheLoop);
-  if (TC > 1 && TC < TinyTripCountUnrollThreshold)
+  if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
     return 1;
 
   unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
@@ -4800,32 +4664,32 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
   R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
   R.NumInstructions = std::max(R.NumInstructions, 1U);
 
-  // We calculate the unroll factor using the following formula.
+  // We calculate the interleave count using the following formula.
   // Subtract the number of loop invariants from the number of available
-  // registers. These registers are used by all of the unrolled instances.
+  // registers. These registers are used by all of the interleaved instances.
   // Next, divide the remaining registers by the number of registers that is
   // required by the loop, in order to estimate how many parallel instances
   // fit without causing spills. All of this is rounded down if necessary to be
-  // a power of two. We want power of two unroll factors to simplify any
+  // a power of two. We want power of two interleave count to simplify any
   // addressing operations or alignment considerations.
-  unsigned UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+  unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
                               R.MaxLocalUsers);
 
-  // Don't count the induction variable as unrolled.
+  // Don't count the induction variable as interleaved.
   if (EnableIndVarRegisterHeur)
-    UF = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
+    IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
                        std::max(1U, (R.MaxLocalUsers - 1)));
 
-  // Clamp the unroll factor ranges to reasonable factors.
-  unsigned MaxInterleaveSize = TTI.getMaxInterleaveFactor(VF);
+  // Clamp the interleave ranges to reasonable counts.
+  unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
 
-  // Check if the user has overridden the unroll max.
+  // Check if the user has overridden the max.
   if (VF == 1) {
     if (ForceTargetMaxScalarInterleaveFactor.getNumOccurrences() > 0)
-      MaxInterleaveSize = ForceTargetMaxScalarInterleaveFactor;
+      MaxInterleaveCount = ForceTargetMaxScalarInterleaveFactor;
   } else {
     if (ForceTargetMaxVectorInterleaveFactor.getNumOccurrences() > 0)
-      MaxInterleaveSize = ForceTargetMaxVectorInterleaveFactor;
+      MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor;
   }
 
   // If we did not calculate the cost for VF (because the user selected the VF)
@@ -4833,72 +4697,74 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
   if (LoopCost == 0)
     LoopCost = expectedCost(VF);
 
-  // Clamp the calculated UF to be between the 1 and the max unroll factor
+  // Clamp the calculated IC to be between the 1 and the max interleave count
   // that the target allows.
-  if (UF > MaxInterleaveSize)
-    UF = MaxInterleaveSize;
-  else if (UF < 1)
-    UF = 1;
+  if (IC > MaxInterleaveCount)
+    IC = MaxInterleaveCount;
+  else if (IC < 1)
+    IC = 1;
 
-  // Unroll if we vectorized this loop and there is a reduction that could
-  // benefit from unrolling.
+  // Interleave if we vectorized this loop and there is a reduction that could
+  // benefit from interleaving.
   if (VF > 1 && Legal->getReductionVars()->size()) {
-    DEBUG(dbgs() << "LV: Unrolling because of reductions.\n");
-    return UF;
+    DEBUG(dbgs() << "LV: Interleaving because of reductions.\n");
+    return IC;
   }
 
   // Note that if we've already vectorized the loop we will have done the
-  // runtime check and so unrolling won't require further checks.
-  bool UnrollingRequiresRuntimePointerCheck =
-      (VF == 1 && Legal->getRuntimePointerCheck()->Need);
+  // runtime check and so interleaving won't require further checks.
+  bool InterleavingRequiresRuntimePointerCheck =
+      (VF == 1 && Legal->getRuntimePointerChecking()->Need);
 
-  // We want to unroll small loops in order to reduce the loop overhead and
+  // We want to interleave small loops in order to reduce the loop overhead and
   // potentially expose ILP opportunities.
   DEBUG(dbgs() << "LV: Loop cost is " << LoopCost << '\n');
-  if (!UnrollingRequiresRuntimePointerCheck &&
-      LoopCost < SmallLoopCost) {
+  if (!InterleavingRequiresRuntimePointerCheck && LoopCost < SmallLoopCost) {
     // We assume that the cost overhead is 1 and we use the cost model
-    // to estimate the cost of the loop and unroll until the cost of the
+    // to estimate the cost of the loop and interleave until the cost of the
     // loop overhead is about 5% of the cost of the loop.
-    unsigned SmallUF = std::min(UF, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
+    unsigned SmallIC =
+        std::min(IC, (unsigned)PowerOf2Floor(SmallLoopCost / LoopCost));
 
-    // Unroll until store/load ports (estimated by max unroll factor) are
+    // Interleave until store/load ports (estimated by max interleave count) are
     // saturated.
     unsigned NumStores = Legal->getNumStores();
     unsigned NumLoads = Legal->getNumLoads();
-    unsigned StoresUF = UF / (NumStores ? NumStores : 1);
-    unsigned LoadsUF = UF /  (NumLoads ? NumLoads : 1);
+    unsigned StoresIC = IC / (NumStores ? NumStores : 1);
+    unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1);
 
     // If we have a scalar reduction (vector reductions are already dealt with
     // by this point), we can increase the critical path length if the loop
-    // we're unrolling is inside another loop. Limit, by default to 2, so the
+    // we're interleaving is inside another loop. Limit, by default to 2, so the
     // critical path only gets increased by one reduction operation.
     if (Legal->getReductionVars()->size() &&
         TheLoop->getLoopDepth() > 1) {
-      unsigned F = static_cast<unsigned>(MaxNestedScalarReductionUF);
-      SmallUF = std::min(SmallUF, F);
-      StoresUF = std::min(StoresUF, F);
-      LoadsUF = std::min(LoadsUF, F);
+      unsigned F = static_cast<unsigned>(MaxNestedScalarReductionIC);
+      SmallIC = std::min(SmallIC, F);
+      StoresIC = std::min(StoresIC, F);
+      LoadsIC = std::min(LoadsIC, F);
     }
 
-    if (EnableLoadStoreRuntimeUnroll && std::max(StoresUF, LoadsUF) > SmallUF) {
-      DEBUG(dbgs() << "LV: Unrolling to saturate store or load ports.\n");
-      return std::max(StoresUF, LoadsUF);
+    if (EnableLoadStoreRuntimeInterleave &&
+        std::max(StoresIC, LoadsIC) > SmallIC) {
+      DEBUG(dbgs() << "LV: Interleaving to saturate store or load ports.\n");
+      return std::max(StoresIC, LoadsIC);
     }
 
-    DEBUG(dbgs() << "LV: Unrolling to reduce branch cost.\n");
-    return SmallUF;
+    DEBUG(dbgs() << "LV: Interleaving to reduce branch cost.\n");
+    return SmallIC;
   }
 
-  // Unroll if this is a large loop (small loops are already dealt with by this
-  // point) that could benefit from interleaved unrolling.
+  // Interleave if this is a large loop (small loops are already dealt with by
+  // this
+  // point) that could benefit from interleaving.
   bool HasReductions = (Legal->getReductionVars()->size() > 0);
   if (TTI.enableAggressiveInterleaving(HasReductions)) {
-    DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n");
-    return UF;
+    DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
+    return IC;
   }
 
-  DEBUG(dbgs() << "LV: Not Unrolling.\n");
+  DEBUG(dbgs() << "LV: Not Interleaving.\n");
   return 1;
 }
 
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7c4c279dcf4d..7bac407e77e9 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -69,8 +69,13 @@ static cl::opt<bool> ShouldStartVectorizeHorAtStore(
     cl::desc(
         "Attempt to vectorize horizontal reductions feeding into a store"));
 
+static cl::opt<int>
+MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
+    cl::desc("Attempt to vectorize for this register size in bits"));
+
 namespace {
 
+// FIXME: Set this via cl::opt to allow overriding.
 static const unsigned MinVecRegSize = 128;
 
 static const unsigned RecursionMaxDepth = 12;
@@ -2136,9 +2141,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         }
 
         // Prepare the operand vector.
-        for (unsigned j = 0; j < E->Scalars.size(); ++j)
-          Operands.push_back(cast<PHINode>(E->Scalars[j])->
-                             getIncomingValueForBlock(IBB));
+        for (Value *V : E->Scalars)
+          Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(IBB));
 
         Builder.SetInsertPoint(IBB->getTerminator());
         Builder.SetCurrentDebugLocation(PH->getDebugLoc());
@@ -2172,8 +2176,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::FPTrunc:
     case Instruction::BitCast: {
       ValueList INVL;
-      for (int i = 0, e = E->Scalars.size(); i < e; ++i)
-        INVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+      for (Value *V : E->Scalars)
+        INVL.push_back(cast<Instruction>(V)->getOperand(0));
 
       setInsertPointAfterBundle(E->Scalars);
 
@@ -2191,9 +2195,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     case Instruction::FCmp:
     case Instruction::ICmp: {
       ValueList LHSV, RHSV;
-      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
-        LHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
-        RHSV.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+      for (Value *V : E->Scalars) {
+        LHSV.push_back(cast<Instruction>(V)->getOperand(0));
+        RHSV.push_back(cast<Instruction>(V)->getOperand(1));
       }
 
       setInsertPointAfterBundle(E->Scalars);
@@ -2217,10 +2221,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     }
     case Instruction::Select: {
       ValueList TrueVec, FalseVec, CondVec;
-      for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
-        CondVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
-        TrueVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
-        FalseVec.push_back(cast<Instruction>(E->Scalars[i])->getOperand(2));
+      for (Value *V : E->Scalars) {
+        CondVec.push_back(cast<Instruction>(V)->getOperand(0));
+        TrueVec.push_back(cast<Instruction>(V)->getOperand(1));
+        FalseVec.push_back(cast<Instruction>(V)->getOperand(2));
       }
 
       setInsertPointAfterBundle(E->Scalars);
@@ -2259,9 +2263,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       if (isa<BinaryOperator>(VL0) && VL0->isCommutative())
         reorderInputsAccordingToOpcode(E->Scalars, LHSVL, RHSVL);
       else
-        for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
-          LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
-          RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+        for (Value *V : E->Scalars) {
+          LHSVL.push_back(cast<Instruction>(V)->getOperand(0));
+          RHSVL.push_back(cast<Instruction>(V)->getOperand(1));
         }
 
       setInsertPointAfterBundle(E->Scalars);
@@ -2322,8 +2326,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       unsigned AS = SI->getPointerAddressSpace();
 
       ValueList ValueOp;
-      for (int i = 0, e = E->Scalars.size(); i < e; ++i)
-        ValueOp.push_back(cast<StoreInst>(E->Scalars[i])->getValueOperand());
+      for (Value *V : E->Scalars)
+        ValueOp.push_back(cast<StoreInst>(V)->getValueOperand());
 
       setInsertPointAfterBundle(E->Scalars);
 
@@ -2351,8 +2355,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       setInsertPointAfterBundle(E->Scalars);
 
       ValueList Op0VL;
-      for (int i = 0, e = E->Scalars.size(); i < e; ++i)
-        Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
+      for (Value *V : E->Scalars)
+        Op0VL.push_back(cast<GetElementPtrInst>(V)->getOperand(0));
 
       Value *Op0 = vectorizeTree(Op0VL);
 
@@ -2360,8 +2364,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
            ++j) {
         ValueList OpVL;
-        for (int i = 0, e = E->Scalars.size(); i < e; ++i)
-          OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
+        for (Value *V : E->Scalars)
+          OpVL.push_back(cast<GetElementPtrInst>(V)->getOperand(j));
 
         Value *OpVec = vectorizeTree(OpVL);
         OpVecs.push_back(OpVec);
@@ -2397,8 +2401,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
           OpVecs.push_back(CEI->getArgOperand(j));
           continue;
         }
-        for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
-          CallInst *CEI = cast<CallInst>(E->Scalars[i]);
+        for (Value *V : E->Scalars) {
+          CallInst *CEI = cast<CallInst>(V);
           OpVL.push_back(CEI->getArgOperand(j));
         }
 
@@ -3089,6 +3093,17 @@ struct SLPVectorizer : public FunctionPass {
     if (!TTI->getNumberOfRegisters(true))
       return false;
 
+    // Use the vector register size specified by the target unless overridden
+    // by a command-line option.
+    // TODO: It would be better to limit the vectorization factor based on
+    //       data type rather than just register size. For example, x86 AVX has
+    //       256-bit registers, but it does not support integer operations
+    //       at that width (that requires AVX2).
+    if (MaxVectorRegSizeOption.getNumOccurrences())
+      MaxVecRegSize = MaxVectorRegSizeOption;
+    else
+      MaxVecRegSize = TTI->getRegisterBitWidth(true);
+
     // Don't vectorize when the attribute NoImplicitFloat is used.
     if (F.hasFnAttribute(Attribute::NoImplicitFloat))
       return false;
@@ -3166,12 +3181,13 @@ private:
   bool vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R);
 
   bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,
-                           BoUpSLP &R);
+                           BoUpSLP &R, unsigned VecRegSize);
 
   bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
                        BoUpSLP &R);
 private:
   StoreListMap StoreRefs;
+  unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt.
 };
 
 /// \brief Check that the Values in the slice in VL array are still existent in
@@ -3186,14 +3202,15 @@ static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
 }
 
 bool SLPVectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
-                                          int CostThreshold, BoUpSLP &R) {
+                                        int CostThreshold, BoUpSLP &R,
+                                        unsigned VecRegSize) {
   unsigned ChainLen = Chain.size();
   DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
         << "\n");
   Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
   auto &DL = cast<StoreInst>(Chain[0])->getModule()->getDataLayout();
   unsigned Sz = DL.getTypeSizeInBits(StoreTy);
-  unsigned VF = MinVecRegSize / Sz;
+  unsigned VF = VecRegSize / Sz;
 
   if (!isPowerOf2_32(Sz) || VF < 2)
     return false;
@@ -3277,12 +3294,16 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
       I = ConsecutiveChain[I];
     }
 
-    bool Vectorized = vectorizeStoreChain(Operands, costThreshold, R);
-
-    // Mark the vectorized stores so that we don't vectorize them again.
-    if (Vectorized)
-      VectorizedStores.insert(Operands.begin(), Operands.end());
-    Changed |= Vectorized;
+    // FIXME: Is division-by-2 the correct step? Should we assert that the
+    // register size is a power-of-2?
+    for (unsigned Size = MaxVecRegSize; Size >= MinVecRegSize; Size /= 2) {
+      if (vectorizeStoreChain(Operands, costThreshold, R, Size)) {
+        // Mark the vectorized stores so that we don't vectorize them again.
+        VectorizedStores.insert(Operands.begin(), Operands.end());
+        Changed = true;
+        break;
+      }
+    }
   }
 
   return Changed;
@@ -3293,8 +3314,8 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
   unsigned count = 0;
   StoreRefs.clear();
   const DataLayout &DL = BB->getModule()->getDataLayout();
-  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    StoreInst *SI = dyn_cast<StoreInst>(it);
+  for (Instruction &I : *BB) {
+    StoreInst *SI = dyn_cast<StoreInst>(&I);
     if (!SI)
       continue;
 
@@ -3342,13 +3363,15 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
 
   Type *Ty0 = I0->getType();
   unsigned Sz = DL.getTypeSizeInBits(Ty0);
+  // FIXME: Register size should be a parameter to this function, so we can
+  // try different vectorization factors.
   unsigned VF = MinVecRegSize / Sz;
 
-  for (int i = 0, e = VL.size(); i < e; ++i) {
-    Type *Ty = VL[i]->getType();
+  for (Value *V : VL) {
+    Type *Ty = V->getType();
     if (!isValidElementType(Ty))
       return false;
-    Instruction *Inst = dyn_cast<Instruction>(VL[i]);
+    Instruction *Inst = dyn_cast<Instruction>(V);
     if (!Inst || Inst->getOpcode() != Opcode0)
       return false;
   }
@@ -3571,6 +3594,8 @@ public:
     const DataLayout &DL = B->getModule()->getDataLayout();
     ReductionOpcode = B->getOpcode();
     ReducedValueOpcode = 0;
+    // FIXME: Register size should be a parameter to this function, so we can
+    // try different vectorization factors.
     ReduxWidth = MinVecRegSize / DL.getTypeSizeInBits(Ty);
     ReductionRoot = B;
     ReductionPHI = Phi;
@@ -3997,6 +4022,9 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
           << it->second.size() << ".\n");
 
     // Process the stores in chunks of 16.
+    // TODO: The limit of 16 inhibits greater vectorization factors.
+    //       For example, AVX2 supports v32i8. Increasing this limit, however,
+    //       may cause a significant compile-time increase.
     for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
       unsigned Len = std::min<unsigned>(CE - CI, 16);
       Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len),
author	Dimitry Andric <dim@FreeBSD.org>	2015-08-07 23:01:33 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2015-08-07 23:01:33 +0000
commit	ee8648bdac07986a0f1ec897b02ec82a2f144d46 (patch)
tree	52d1861acda1205241ee35a94aa63129c604d469 /lib
parent	1a82d4c088707c791c792f6822f611b47a12bdfe (diff)