vendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085

author: Dimitry Andric <dim@FreeBSD.org> 2020-01-17 20:45:01 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2020-01-17 20:45:01 +0000
commit: 706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch)
tree: 4adf86a776049cbf7f69a1929c4babcbbef925eb /llvm/lib/CodeGen
parent: 7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff)
192 files changed, 10533 insertions, 5252 deletions
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index 4f24f077d120..1632895fe5fa 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -262,7 +262,7 @@ static bool isNoopBitcast(Type *T1, Type *T2,
 /// Look through operations that will be free to find the earliest source of
 /// this value.
 ///
-/// @param ValLoc If V has aggegate type, we will be interested in a particular
+/// @param ValLoc If V has aggregate type, we will be interested in a particular
 /// scalar component. This records its address; the reverse of this list gives a
 /// sequence of indices appropriate for an extractvalue to locate the important
 /// value. This value is updated during the function and on exit will indicate
@@ -567,12 +567,16 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
   AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
                           AttributeList::ReturnIndex);
 
-  // NoAlias and NonNull are completely benign as far as calling convention
+  // Following attributes are completely benign as far as calling convention
   // goes, they shouldn't affect whether the call is a tail call.
   CallerAttrs.removeAttribute(Attribute::NoAlias);
   CalleeAttrs.removeAttribute(Attribute::NoAlias);
   CallerAttrs.removeAttribute(Attribute::NonNull);
   CalleeAttrs.removeAttribute(Attribute::NonNull);
+  CallerAttrs.removeAttribute(Attribute::Dereferenceable);
+  CalleeAttrs.removeAttribute(Attribute::Dereferenceable);
+  CallerAttrs.removeAttribute(Attribute::DereferenceableOrNull);
+  CalleeAttrs.removeAttribute(Attribute::DereferenceableOrNull);
 
   if (CallerAttrs.contains(Attribute::ZExt)) {
     if (!CalleeAttrs.contains(Attribute::ZExt))
@@ -611,6 +615,22 @@ bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
   return CallerAttrs == CalleeAttrs;
 }
 
+/// Check whether B is a bitcast of a pointer type to another pointer type,
+/// which is equal to A.
+static bool isPointerBitcastEqualTo(const Value *A, const Value *B) {
+  assert(A && B && "Expected non-null inputs!");
+
+  auto *BitCastIn = dyn_cast<BitCastInst>(B);
+
+  if (!BitCastIn)
+    return false;
+
+  if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
+    return false;
+
+  return A == BitCastIn->getOperand(0);
+}
+
 bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
                                            const Instruction *I,
                                            const ReturnInst *Ret,
@@ -643,7 +663,8 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
           TLI.getLibcallName(RTLIB::MEMMOVE) == StringRef("memmove")) ||
          (IID == Intrinsic::memset &&
           TLI.getLibcallName(RTLIB::MEMSET) == StringRef("memset"))) &&
-        RetVal == Call->getArgOperand(0))
+        (RetVal == Call->getArgOperand(0) ||
+         isPointerBitcastEqualTo(RetVal, Call->getArgOperand(0))))
       return true;
   }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 73c53d6c4af5..6f9aa4dd79fd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -31,13 +31,16 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -52,6 +55,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
@@ -81,7 +85,6 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodePadder.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCDwarf.h"
@@ -139,18 +142,13 @@ static const char *const DbgTimerDescription = "Debug Info Emission";
 static const char *const EHTimerName = "write_exception";
 static const char *const EHTimerDescription = "DWARF Exception Writer";
 static const char *const CFGuardName = "Control Flow Guard";
-static const char *const CFGuardDescription = "Control Flow Guard Tables";
+static const char *const CFGuardDescription = "Control Flow Guard";
 static const char *const CodeViewLineTablesGroupName = "linetables";
 static const char *const CodeViewLineTablesGroupDescription =
   "CodeView Line Tables";
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
-static cl::opt<bool> EnableRemarksSection(
-    "remarks-section",
-    cl::desc("Emit a section containing remark diagnostics metadata"),
-    cl::init(false));
-
 char AsmPrinter::ID = 0;
 
 using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>;
@@ -253,6 +251,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineModuleInfoWrapperPass>();
   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
   AU.addRequired<GCModuleInfo>();
+  AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
@@ -381,12 +381,12 @@ bool AsmPrinter::doInitialization(Module &M) {
                           EHTimerDescription, DWARFGroupName,
                           DWARFGroupDescription);
 
+  // Emit tables for any value of cfguard flag (i.e. cfguard=1 or cfguard=2).
   if (mdconst::extract_or_null<ConstantInt>(
-          MMI->getModule()->getModuleFlag("cfguardtable")))
+          MMI->getModule()->getModuleFlag("cfguard")))
     Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
                           CFGuardDescription, DWARFGroupName,
                           DWARFGroupDescription);
-
   return false;
 }
 
@@ -879,6 +879,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
     OS << MI->getOperand(0).getImm();
   } else if (MI->getOperand(0).isCImm()) {
     MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
+  } else if (MI->getOperand(0).isTargetIndex()) {
+    auto Op = MI->getOperand(0);
+    OS << "!target-index(" << Op.getIndex() << "," << Op.getOffset() << ")";
+    return true;
   } else {
     unsigned Reg;
     if (MI->getOperand(0).isReg()) {
@@ -940,7 +944,7 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const {
       MF->getFunction().needsUnwindTableEntry())
     return CFI_M_EH;
 
-  if (MMI->hasDebugInfo())
+  if (MMI->hasDebugInfo() || MF->getTarget().Options.ForceDwarfFrameSection)
     return CFI_M_Debug;
 
   return CFI_M_None;
@@ -1065,13 +1069,9 @@ void AsmPrinter::EmitFunctionBody() {
         ++NumInstsInFunction;
       }
 
-      // If there is a pre-instruction symbol, emit a label for it here. If the
-      // instruction was duplicated and the label has already been emitted,
-      // don't re-emit the same label.
-      // FIXME: Consider strengthening that to an assertion.
+      // If there is a pre-instruction symbol, emit a label for it here.
       if (MCSymbol *S = MI.getPreInstrSymbol())
-        if (S->isUndefined())
-          OutStreamer->EmitLabel(S);
+        OutStreamer->EmitLabel(S);
 
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
@@ -1124,13 +1124,9 @@ void AsmPrinter::EmitFunctionBody() {
         break;
       }
 
-      // If there is a post-instruction symbol, emit a label for it here.  If
-      // the instruction was duplicated and the label has already been emitted,
-      // don't re-emit the same label.
-      // FIXME: Consider strengthening that to an assertion.
+      // If there is a post-instruction symbol, emit a label for it here.
       if (MCSymbol *S = MI.getPostInstrSymbol())
-        if (S->isUndefined())
-          OutStreamer->EmitLabel(S);
+        OutStreamer->EmitLabel(S);
 
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
@@ -1226,6 +1222,8 @@ void AsmPrinter::EmitFunctionBody() {
   // Emit section containing stack size metadata.
   emitStackSizeSection(*MF);
 
+  emitPatchableFunctionEntries();
+
   if (isVerbose())
     OutStreamer->GetCommentOS() << "-- End function\n";
 
@@ -1365,14 +1363,14 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
   }
 }
 
-void AsmPrinter::emitRemarksSection(Module &M) {
-  RemarkStreamer *RS = M.getContext().getRemarkStreamer();
-  if (!RS)
+void AsmPrinter::emitRemarksSection(RemarkStreamer &RS) {
+  if (!RS.needsSection())
     return;
-  remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer();
+
+  remarks::RemarkSerializer &RemarkSerializer = RS.getSerializer();
 
   Optional<SmallString<128>> Filename;
-  if (Optional<StringRef> FilenameRef = RS->getFilename()) {
+  if (Optional<StringRef> FilenameRef = RS.getFilename()) {
     Filename = *FilenameRef;
     sys::fs::make_absolute(*Filename);
     assert(!Filename->empty() && "The filename can't be empty.");
@@ -1385,7 +1383,7 @@ void AsmPrinter::emitRemarksSection(Module &M) {
                : RemarkSerializer.metaSerializer(OS);
   MetaSerializer->emit();
 
-  // Switch to the right section: .remarks/__remarks.
+  // Switch to the remarks section.
   MCSection *RemarksSection =
       OutContext.getObjectFileInfo()->getRemarksSection();
   OutStreamer->SwitchSection(RemarksSection);
@@ -1427,8 +1425,8 @@ bool AsmPrinter::doFinalization(Module &M) {
   // Emit the remarks section contents.
   // FIXME: Figure out when is the safest time to emit this section. It should
   // not come after debug info.
-  if (EnableRemarksSection)
-    emitRemarksSection(M);
+  if (RemarkStreamer *RS = M.getContext().getRemarkStreamer())
+    emitRemarksSection(*RS);
 
   const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
@@ -1503,8 +1501,6 @@ bool AsmPrinter::doFinalization(Module &M) {
     }
   }
 
-  OutStreamer->AddBlankLine();
-
   // Print aliases in topological order, that is, for each alias a = b,
   // b must be printed before a.
   // This is because on some targets (e.g. PowerPC) linker expects aliases in
@@ -1666,6 +1662,7 @@ MCSymbol *AsmPrinter::getCurExceptionSym() {
 
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
+  const Function &F = MF.getFunction();
 
   // Get the function symbol.
   if (MAI->needsFunctionDescriptors()) {
@@ -1678,7 +1675,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
     CurrentFnSym =
         OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName());
 
-    const Function &F = MF.getFunction();
     MCSectionXCOFF *FnEntryPointSec =
         cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM));
     // Set the containing csect.
@@ -1691,7 +1687,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   CurrentFnBegin = nullptr;
   CurExceptionSym = nullptr;
   bool NeedsLocalForSize = MAI->needsLocalForSize();
-  if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
+  if (F.hasFnAttribute("patchable-function-entry") ||
+      needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize ||
       MF.getTarget().Options.EmitStackSizeSection) {
     CurrentFnBegin = createTempSymbol("func_begin");
     if (NeedsLocalForSize)
@@ -1699,6 +1696,13 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   }
 
   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  MBFI = (PSI && PSI->hasProfileSummary()) ?
+         // ORE conditionally computes MBFI. If available, use it, otherwise
+         // request it.
+         (ORE->getBFI() ? ORE->getBFI() :
+          &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()) :
+         nullptr;
 }
 
 namespace {
@@ -1769,6 +1773,11 @@ void AsmPrinter::EmitConstantPool() {
       if (!Sym->isUndefined())
         continue;
 
+      if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+        cast<MCSymbolXCOFF>(Sym)->setContainingCsect(
+            cast<MCSectionXCOFF>(CPSections[i].S));
+      }
+
       if (CurSection != CPSections[i].S) {
         OutStreamer->SwitchSection(CPSections[i].S);
         EmitAlignment(Align(CPSections[i].Alignment));
@@ -1858,10 +1867,16 @@ void AsmPrinter::EmitJumpTableInfo() {
     // second label is actually referenced by the code.
     if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
       // FIXME: This doesn't have to have any specific name, just any randomly
-      // named and numbered 'l' label would work.  Simplify GetJTISymbol.
+      // named and numbered local label started with 'l' would work.  Simplify
+      // GetJTISymbol.
       OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
 
-    OutStreamer->EmitLabel(GetJTISymbol(JTI));
+    MCSymbol* JTISymbol = GetJTISymbol(JTI);
+    if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
+      cast<MCSymbolXCOFF>(JTISymbol)->setContainingCsect(
+          cast<MCSectionXCOFF>(TLOF.getSectionForJumpTable(F, TM)));
+    }
+    OutStreamer->EmitLabel(JTISymbol);
 
     for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
       EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
@@ -2914,19 +2929,6 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
   PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
 }
 
-void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
-                                         MCCodePaddingContext &Context) const {
-  assert(MF != nullptr && "Machine function must be valid");
-  Context.IsPaddingActive = !MF->hasInlineAsm() &&
-                            !MF->getFunction().hasOptSize() &&
-                            TM.getOptLevel() != CodeGenOpt::None;
-  Context.IsBasicBlockReachableViaFallthrough =
-      std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
-      MBB.pred_end();
-  Context.IsBasicBlockReachableViaBranch =
-      MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB);
-}
-
 /// EmitBasicBlockStart - This method prints the label for the specified
 /// MachineBasicBlock, an alignment (if present) and a comment describing
 /// it if appropriate.
@@ -2943,9 +2945,6 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
   const Align Alignment = MBB.getAlignment();
   if (Alignment != Align::None())
     EmitAlignment(Alignment);
-  MCCodePaddingContext Context;
-  setupCodePaddingContext(MBB, Context);
-  OutStreamer->EmitCodePaddingBasicBlockStart(Context);
 
   // If the block has its address taken, emit any labels that were used to
   // reference the block.  It is possible that there is more than one label
@@ -2993,11 +2992,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
   }
 }
 
-void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {
-  MCCodePaddingContext Context;
-  setupCodePaddingContext(MBB, Context);
-  OutStreamer->EmitCodePaddingBasicBlockEnd(Context);
-}
+void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) {}
 
 void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
                                 bool IsDefinition) const {
@@ -3202,6 +3197,41 @@ void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
                                        AlwaysInstrument, &F, Version});
 }
 
+void AsmPrinter::emitPatchableFunctionEntries() {
+  const Function &F = MF->getFunction();
+  if (!F.hasFnAttribute("patchable-function-entry"))
+    return;
+  const unsigned PointerSize = getPointerSize();
+  if (TM.getTargetTriple().isOSBinFormatELF()) {
+    auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC;
+
+    // As of binutils 2.33, GNU as does not support section flag "o" or linkage
+    // field "unique". Use SHF_LINK_ORDER if we are using the integrated
+    // assembler.
+    if (MAI->useIntegratedAssembler()) {
+      Flags |= ELF::SHF_LINK_ORDER;
+      std::string GroupName;
+      if (F.hasComdat()) {
+        Flags |= ELF::SHF_GROUP;
+        GroupName = F.getComdat()->getName();
+      }
+      MCSection *Section = getObjFileLowering().SectionForGlobal(&F, TM);
+      unsigned UniqueID =
+          PatchableFunctionEntryID
+              .try_emplace(Section, PatchableFunctionEntryID.size())
+              .first->second;
+      OutStreamer->SwitchSection(OutContext.getELFSection(
+          "__patchable_function_entries", ELF::SHT_PROGBITS, Flags, 0,
+          GroupName, UniqueID, cast<MCSymbolELF>(CurrentFnSym)));
+    } else {
+      OutStreamer->SwitchSection(OutContext.getELFSection(
+          "__patchable_function_entries", ELF::SHT_PROGBITS, Flags));
+    }
+    EmitAlignment(Align(PointerSize));
+    OutStreamer->EmitSymbolValue(CurrentFnBegin, PointerSize);
+  }
+}
+
 uint16_t AsmPrinter::getDwarfVersion() const {
   return OutStreamer->getContext().getDwarfVersion();
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 420df26a2b8b..c631cc5360b8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -207,11 +207,17 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
       }
       if (Done) break;
 
+      bool HasCurlyBraces = false;
+      if (*LastEmitted == '{') {     // ${variable}
+        ++LastEmitted;               // Consume '{' character.
+        HasCurlyBraces = true;
+      }
+
       // If we have ${:foo}, then this is not a real operand reference, it is a
       // "magic" string reference, just like in .td files.  Arrange to call
       // PrintSpecial.
-      if (LastEmitted[0] == '{' && LastEmitted[1] == ':') {
-        LastEmitted += 2;
+      if (HasCurlyBraces && LastEmitted[0] == ':') {
+        ++LastEmitted;
         const char *StrStart = LastEmitted;
         const char *StrEnd = strchr(StrStart, '}');
         if (!StrEnd)
@@ -238,6 +244,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
         report_fatal_error("Invalid $ operand number in inline asm string: '" +
                            Twine(AsmStr) + "'");
 
+      char Modifier[2] = { 0, 0 };
+
+      if (HasCurlyBraces) {
+        // If we have curly braces, check for a modifier character.  This
+        // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+        if (*LastEmitted == ':') {
+          ++LastEmitted;    // Consume ':' character.
+          if (*LastEmitted == 0)
+            report_fatal_error("Bad ${:} expression in inline asm string: '" +
+                               Twine(AsmStr) + "'");
+
+          Modifier[0] = *LastEmitted;
+          ++LastEmitted;    // Consume modifier character.
+        }
+
+        if (*LastEmitted != '}')
+          report_fatal_error("Bad ${} expression in inline asm string: '" +
+                             Twine(AsmStr) + "'");
+        ++LastEmitted;    // Consume '}' character.
+      }
+
       // Okay, we finally have a value number.  Ask the target to print this
       // operand!
       unsigned OpNo = InlineAsm::MIOp_FirstOperand;
@@ -262,9 +289,11 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
         ++OpNo;  // Skip over the ID number.
 
         if (InlineAsm::isMemKind(OpFlags)) {
-          Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
+          Error = AP->PrintAsmMemoryOperand(
+              MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
         } else {
-          Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
+          Error = AP->PrintAsmOperand(MI, OpNo,
+                                      Modifier[0] ? Modifier : nullptr, OS);
         }
       }
       if (Error) {
@@ -427,26 +456,23 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
 
           // FIXME: Shouldn't arch-independent output template handling go into
           // PrintAsmOperand?
-          if (Modifier[0] == 'l') { // Labels are target independent.
-            if (MI->getOperand(OpNo).isBlockAddress()) {
-              const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
-              MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
-              Sym->print(OS, AP->MAI);
-              MMI->getContext().registerInlineAsmLabel(Sym);
-            } else if (MI->getOperand(OpNo).isMBB()) {
-              const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
-              Sym->print(OS, AP->MAI);
-            } else {
-              Error = true;
-            }
+          // Labels are target independent.
+          if (MI->getOperand(OpNo).isBlockAddress()) {
+            const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+            MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+            Sym->print(OS, AP->MAI);
+            MMI->getContext().registerInlineAsmLabel(Sym);
+          } else if (MI->getOperand(OpNo).isMBB()) {
+            const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+            Sym->print(OS, AP->MAI);
+          } else if (Modifier[0] == 'l') {
+            Error = true;
+          } else if (InlineAsm::isMemKind(OpFlags)) {
+            Error = AP->PrintAsmMemoryOperand(
+                MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
           } else {
-            if (InlineAsm::isMemKind(OpFlags)) {
-              Error = AP->PrintAsmMemoryOperand(
-                  MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
-            } else {
-              Error = AP->PrintAsmOperand(MI, OpNo,
-                                          Modifier[0] ? Modifier : nullptr, OS);
-            }
+            Error = AP->PrintAsmOperand(MI, OpNo,
+                                        Modifier[0] ? Modifier : nullptr, OS);
           }
         }
         if (Error) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index c6457f3626d1..62ad356e7f8f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1100,14 +1100,8 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
     }
 
     for (auto HeapAllocSite : FI.HeapAllocSites) {
-      MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
-      MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
-
-      // The labels might not be defined if the instruction was replaced
-      // somewhere in the codegen pipeline.
-      if (!BeginLabel->isDefined() || !EndLabel->isDefined())
-        continue;
-
+      const MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
+      const MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
       const DIType *DITy = std::get<2>(HeapAllocSite);
       MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
       OS.AddComment("Call site offset");
@@ -1427,6 +1421,16 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
     DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
     maybeRecordLocation(FnStartDL, MF);
   }
+
+  // Find heap alloc sites and emit labels around them.
+  for (const auto &MBB : *MF) {
+    for (const auto &MI : MBB) {
+      if (MI.getHeapAllocMarker()) {
+        requestLabelBeforeInsn(&MI);
+        requestLabelAfterInsn(&MI);
+      }
+    }
+  }
 }
 
 static bool shouldEmitUdt(const DIType *T) {
@@ -2850,8 +2854,18 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
     return;
   }
 
+  // Find heap alloc sites and add to list.
+  for (const auto &MBB : *MF) {
+    for (const auto &MI : MBB) {
+      if (MDNode *MD = MI.getHeapAllocMarker()) {
+        CurFn->HeapAllocSites.push_back(std::make_tuple(getLabelBeforeInsn(&MI),
+                                                        getLabelAfterInsn(&MI),
+                                                        dyn_cast<DIType>(MD)));
+      }
+    }
+  }
+
   CurFn->Annotations = MF->getCodeViewAnnotations();
-  CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites();
 
   CurFn->End = Asm->getFunctionEnd();
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 7ffd77926cf7..b56b9047e1a9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -148,7 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     SmallVector<LexicalBlock *, 1> ChildBlocks;
 
     std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
-    std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
+    std::vector<std::tuple<const MCSymbol *, const MCSymbol *, const DIType *>>
         HeapAllocSites;
 
     const MCSymbol *Begin = nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index f4134da48caa..84b86a71fa5f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -319,8 +319,10 @@ DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
 {
   Die.Owner = this;
   assert((UnitTag == dwarf::DW_TAG_compile_unit ||
+          UnitTag == dwarf::DW_TAG_skeleton_unit ||
           UnitTag == dwarf::DW_TAG_type_unit ||
-          UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG");
+          UnitTag == dwarf::DW_TAG_partial_unit) &&
+         "expected a unit TAG");
 }
 
 void DIEValue::EmitValue(const AsmPrinter *AP) const {
@@ -798,6 +800,8 @@ void DIEBlock::print(raw_ostream &O) const {
 //===----------------------------------------------------------------------===//
 
 unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  if (Form == dwarf::DW_FORM_loclistx)
+    return getULEB128Size(Index);
   if (Form == dwarf::DW_FORM_data4)
     return 4;
   if (Form == dwarf::DW_FORM_sec_offset)
@@ -808,6 +812,10 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
 /// EmitValue - Emit label value.
 ///
 void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+  if (Form == dwarf::DW_FORM_loclistx) {
+    AP->EmitULEB128(Index);
+    return;
+  }
   DwarfDebug *DD = AP->getDwarfDebug();
   MCSymbol *Label = DD->getDebugLocs().getList(Index).Label;
   AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 7f9d6c618ad3..170fc8b6d49f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -262,7 +262,9 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
         DbgLabels.addInstr(L, MI);
       }
 
-      if (MI.isDebugInstr())
+      // Meta Instructions have no output and do not change any values and so
+      // can be safely ignored.
+      if (MI.isMetaInstruction())
         continue;
 
       // Not a DBG_VALUE instruction. It may clobber registers which describe
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 17e39b3d3268..36278f2e9e2d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -20,13 +20,33 @@
 namespace llvm {
 class AsmPrinter;
 
+/// This struct describes target specific location.
+struct TargetIndexLocation {
+  int Index;
+  int Offset;
+
+  TargetIndexLocation() = default;
+  TargetIndexLocation(unsigned Idx, int64_t Offset)
+      : Index(Idx), Offset(Offset) {}
+
+  bool operator==(const TargetIndexLocation &Other) const {
+    return Index == Other.Index && Offset == Other.Offset;
+  }
+};
+
 /// A single location or constant.
 class DbgValueLoc {
   /// Any complex address location expression for this DbgValueLoc.
   const DIExpression *Expression;
 
   /// Type of entry that this represents.
-  enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+  enum EntryType {
+    E_Location,
+    E_Integer,
+    E_ConstantFP,
+    E_ConstantInt,
+    E_TargetIndexLocation
+  };
   enum EntryType EntryKind;
 
   /// Either a constant,
@@ -36,8 +56,12 @@ class DbgValueLoc {
     const ConstantInt *CIP;
   } Constant;
 
-  /// Or a location in the machine frame.
-  MachineLocation Loc;
+  union {
+    /// Or a location in the machine frame.
+    MachineLocation Loc;
+    /// Or a location from target specific location.
+    TargetIndexLocation TIL;
+  };
 
 public:
   DbgValueLoc(const DIExpression *Expr, int64_t i)
@@ -56,8 +80,13 @@ public:
       : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
     assert(cast<DIExpression>(Expr)->isValid());
   }
+  DbgValueLoc(const DIExpression *Expr, TargetIndexLocation Loc)
+      : Expression(Expr), EntryKind(E_TargetIndexLocation), TIL(Loc) {}
 
   bool isLocation() const { return EntryKind == E_Location; }
+  bool isTargetIndexLocation() const {
+    return EntryKind == E_TargetIndexLocation;
+  }
   bool isInt() const { return EntryKind == E_Integer; }
   bool isConstantFP() const { return EntryKind == E_ConstantFP; }
   bool isConstantInt() const { return EntryKind == E_ConstantInt; }
@@ -65,6 +94,7 @@ public:
   const ConstantFP *getConstantFP() const { return Constant.CFP; }
   const ConstantInt *getConstantInt() const { return Constant.CIP; }
   MachineLocation getLoc() const { return Loc; }
+  TargetIndexLocation getTargetIndexLocation() const { return TIL; }
   bool isFragment() const { return getExpression()->isFragment(); }
   bool isEntryVal() const { return getExpression()->isEntryValue(); }
   const DIExpression *getExpression() const { return Expression; }
@@ -162,6 +192,8 @@ inline bool operator==(const DbgValueLoc &A,
   switch (A.EntryKind) {
   case DbgValueLoc::E_Location:
     return A.Loc == B.Loc;
+  case DbgValueLoc::E_TargetIndexLocation:
+    return A.TIL == B.TIL;
   case DbgValueLoc::E_Integer:
     return A.Constant.Int == B.Constant.Int;
   case DbgValueLoc::E_ConstantFP:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index f483d532ff07..8c6109880afc 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -42,4 +42,6 @@ DebugLocStream::ListBuilder::~ListBuilder() {
     return;
   V.initializeDbgValue(&MI);
   V.setDebugLocListIndex(ListIndex);
+  if (TagOffset)
+    V.setDebugLocListTagOffset(*TagOffset);
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 0db86b09d19a..10019a4720e6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -159,11 +159,17 @@ class DebugLocStream::ListBuilder {
   DbgVariable &V;
   const MachineInstr &MI;
   size_t ListIndex;
+  Optional<uint8_t> TagOffset;
 
 public:
   ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm,
               DbgVariable &V, const MachineInstr &MI)
-      : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {}
+      : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)),
+        TagOffset(None) {}
+
+  void setTagOffset(uint8_t TO) {
+    TagOffset = TO;
+  }
 
   /// Finalize the list.
   ///
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 207a7284dafa..facbf22946e4 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
@@ -133,6 +134,8 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
   if (!hasEmittedCFISections) {
     if (Asm->needsOnlyDebugCFIMoves())
       Asm->OutStreamer->EmitCFISections(false, true);
+    else if (Asm->TM.Options.ForceDwarfFrameSection)
+      Asm->OutStreamer->EmitCFISections(true, true);
     hasEmittedCFISections = true;
   }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index a61c98ec1c18..38011102c7b3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -52,10 +52,23 @@
 
 using namespace llvm;
 
+static dwarf::Tag GetCompileUnitType(UnitKind Kind, DwarfDebug *DW) {
+
+  //  According to DWARF Debugging Information Format Version 5,
+  //  3.1.2 Skeleton Compilation Unit Entries:
+  //  "When generating a split DWARF object file (see Section 7.3.2
+  //  on page 187), the compilation unit in the .debug_info section
+  //  is a "skeleton" compilation unit with the tag DW_TAG_skeleton_unit"
+  if (DW->getDwarfVersion() >= 5 && Kind == UnitKind::Skeleton)
+    return dwarf::DW_TAG_skeleton_unit;
+
+  return dwarf::DW_TAG_compile_unit;
+}
+
 DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
                                    AsmPrinter *A, DwarfDebug *DW,
-                                   DwarfFile *DWU)
-    : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) {
+                                   DwarfFile *DWU, UnitKind Kind)
+    : DwarfUnit(GetCompileUnitType(Kind, DW), Node, A, DW, DWU), UniqueID(UID) {
   insertDIE(Node, &getUnitDie());
   MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin");
 }
@@ -65,10 +78,6 @@ DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node,
 void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
                                        const MCSymbol *Label) {
   // Don't use the address pool in non-fission or in the skeleton unit itself.
-  // FIXME: Once GDB supports this, it's probably worthwhile using the address
-  // pool from the skeleton - maybe even in non-fission (possibly fewer
-  // relocations by sharing them in the pool, but we have other ideas about how
-  // to reduce the number of relocations as well/instead).
   if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5)
     return addLocalLabelAddress(Die, Attribute, Label);
 
@@ -490,10 +499,10 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
     const MCSymbol *RangeSectionSym =
         TLOF.getDwarfRangesSection()->getBeginSymbol();
     if (isDwoUnit())
-      addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+      addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
                       RangeSectionSym);
     else
-      addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+      addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.Label,
                       RangeSectionSym);
   }
 }
@@ -602,6 +611,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   unsigned Offset = DV.getDebugLocListIndex();
   if (Offset != ~0U) {
     addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
+    auto TagOffset = DV.getDebugLocListTagOffset();
+    if (TagOffset)
+      addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+              *TagOffset);
     return VariableDie;
   }
 
@@ -619,6 +632,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
         DwarfExpr.addUnsignedConstant(DVal->getInt());
         DwarfExpr.addExpression(Expr);
         addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+        if (DwarfExpr.TagOffset)
+          addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset,
+                  dwarf::DW_FORM_data1, *DwarfExpr.TagOffset);
+
       } else
         addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
     } else if (DVal->isConstantFP()) {
@@ -951,8 +968,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
     addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
                MachineLocation(CallReg));
   } else {
-    DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
-    assert(CalleeDIE && "Could not create DIE for call site entry origin");
+    DIE *CalleeDIE = getDIE(CalleeSP);
+    assert(CalleeDIE && "Could not find DIE for call site entry origin");
     addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
                 *CalleeDIE);
   }
@@ -1185,6 +1202,10 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
 
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, DwarfExpr.finalize());
+
+  if (DwarfExpr.TagOffset)
+    addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+            *DwarfExpr.TagOffset);
 }
 
 /// Start with the address based on the location provided, and generate the
@@ -1215,13 +1236,20 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
 
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, DwarfExpr.finalize());
+
+  if (DwarfExpr.TagOffset)
+    addUInt(Die, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+            *DwarfExpr.TagOffset);
 }
 
 /// Add a Dwarf loclistptr attribute data and value.
 void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
                                        unsigned Index) {
-  dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
-                                                : dwarf::DW_FORM_data4;
+  dwarf::Form Form = dwarf::DW_FORM_data4;
+  if (DD->getDwarfVersion() == 4)
+    Form =dwarf::DW_FORM_sec_offset;
+  if (DD->getDwarfVersion() >= 5)
+    Form =dwarf::DW_FORM_loclistx;
   Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index));
 }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 1b7ea2673ac0..8491d078ed89 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -40,6 +40,8 @@ class MCExpr;
 class MCSymbol;
 class MDNode;
 
+enum class UnitKind { Skeleton, Full };
+
 class DwarfCompileUnit final : public DwarfUnit {
   /// A numeric ID unique among all CUs in the module
   unsigned UniqueID;
@@ -104,7 +106,8 @@ class DwarfCompileUnit final : public DwarfUnit {
 
 public:
   DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
-                   DwarfDebug *DW, DwarfFile *DWU);
+                   DwarfDebug *DW, DwarfFile *DWU,
+                   UnitKind Kind = UnitKind::Full);
 
   bool hasRangeLists() const { return HasRangeLists; }
   unsigned getUniqueID() const { return UniqueID; }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c505e77e5acd..fa6800de7955 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -241,6 +241,11 @@ static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
     MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
     return DbgValueLoc(Expr, MLoc);
   }
+  if (MI->getOperand(0).isTargetIndex()) {
+    auto Op = MI->getOperand(0);
+    return DbgValueLoc(Expr,
+                       TargetIndexLocation(Op.getIndex(), Op.getOffset()));
+  }
   if (MI->getOperand(0).isImm())
     return DbgValueLoc(Expr, MI->getOperand(0).getImm());
   if (MI->getOperand(0).isFPImm())
@@ -535,6 +540,14 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
   }
 }
 
+DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
+  DICompileUnit *Unit = SP->getUnit();
+  assert(SP->isDefinition() && "Subprogram not a definition");
+  assert(Unit && "Subprogram definition without parent unit");
+  auto &CU = getOrCreateDwarfCompileUnit(Unit);
+  return *CU.getOrCreateSubprogramDIE(SP);
+}
+
 /// Try to interpret values loaded into registers that forward parameters
 /// for \p CallMI. Store parameters with interpreted value into \p Params.
 static void collectCallSiteParameters(const MachineInstr *CallMI,
@@ -595,7 +608,6 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
               Implicit.push_back(FwdReg);
             else
               Explicit.push_back(FwdReg);
-            break;
           }
         }
       }
@@ -615,8 +627,12 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
     ++NumCSParams;
   };
 
-  // Search for a loading value in forwaring registers.
+  // Search for a loading value in forwarding registers.
   for (; I != MBB->rend(); ++I) {
+    // Skip bundle headers.
+    if (I->isBundle())
+      continue;
+
     // If the next instruction is a call we can not interpret parameter's
     // forwarding registers or we finished the interpretation of all parameters.
     if (I->isCall())
@@ -636,32 +652,33 @@ static void collectCallSiteParameters(const MachineInstr *CallMI,
     for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs))
       ForwardedRegWorklist.erase(Reg);
 
-    // The describeLoadedValue() hook currently does not have any information
-    // about which register it should describe in case of multiple defines, so
-    // for now we only handle instructions where a forwarded register is (at
-    // least partially) defined by the instruction's single explicit define.
-    if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty())
-      continue;
-    unsigned Reg = ExplicitFwdRegDefs[0];
-
-    if (auto ParamValue = TII->describeLoadedValue(*I)) {
-      if (ParamValue->first.isImm()) {
-        int64_t Val = ParamValue->first.getImm();
-        DbgValueLoc DbgLocVal(ParamValue->second, Val);
-        finishCallSiteParam(DbgLocVal, Reg);
-      } else if (ParamValue->first.isReg()) {
-        Register RegLoc = ParamValue->first.getReg();
-        unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
-        Register FP = TRI->getFrameRegister(*MF);
-        bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
-        if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
-          DbgValueLoc DbgLocVal(ParamValue->second,
-                                MachineLocation(RegLoc,
-                                                /*IsIndirect=*/IsSPorFP));
-          finishCallSiteParam(DbgLocVal, Reg);
-        } else if (ShouldTryEmitEntryVals) {
-          ForwardedRegWorklist.insert(RegLoc);
-          RegsForEntryValues[RegLoc] = Reg;
+    for (auto ParamFwdReg : ExplicitFwdRegDefs) {
+      if (auto ParamValue = TII->describeLoadedValue(*I, ParamFwdReg)) {
+        if (ParamValue->first.isImm()) {
+          int64_t Val = ParamValue->first.getImm();
+          DbgValueLoc DbgLocVal(ParamValue->second, Val);
+          finishCallSiteParam(DbgLocVal, ParamFwdReg);
+        } else if (ParamValue->first.isReg()) {
+          Register RegLoc = ParamValue->first.getReg();
+          // TODO: For now, there is no use of describing the value loaded into the
+          //       register that is also the source registers (e.g. $r0 = add $r0, x).
+          if (ParamFwdReg == RegLoc)
+            continue;
+
+          unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+          Register FP = TRI->getFrameRegister(*MF);
+          bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
+          if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
+            DbgValueLoc DbgLocVal(ParamValue->second,
+                                  MachineLocation(RegLoc,
+                                                  /*IsIndirect=*/IsSPorFP));
+            finishCallSiteParam(DbgLocVal, ParamFwdReg);
+          // TODO: Add support for entry value plus an expression.
+          } else if (ShouldTryEmitEntryVals &&
+                     ParamValue->second->getNumElements() == 0) {
+            ForwardedRegWorklist.insert(RegLoc);
+            RegsForEntryValues[RegLoc] = ParamFwdReg;
+          }
         }
       }
     }
@@ -707,6 +724,12 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
   // Emit call site entries for each call or tail call in the function.
   for (const MachineBasicBlock &MBB : MF) {
     for (const MachineInstr &MI : MBB.instrs()) {
+      // Bundles with call in them will pass the isCall() test below but do not
+      // have callee operand information so skip them here. Iterator will
+      // eventually reach the call MI.
+      if (MI.isBundle())
+        continue;
+
       // Skip instructions which aren't calls. Both calls and tail-calling jump
       // instructions (e.g TAILJMPd64) are classified correctly here.
       if (!MI.isCall())
@@ -735,25 +758,45 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
         if (!CalleeDecl || !CalleeDecl->getSubprogram())
           continue;
         CalleeSP = CalleeDecl->getSubprogram();
+
+        if (CalleeSP->isDefinition()) {
+          // Ensure that a subprogram DIE for the callee is available in the
+          // appropriate CU.
+          constructSubprogramDefinitionDIE(CalleeSP);
+        } else {
+          // Create the declaration DIE if it is missing. This is required to
+          // support compilation of old bitcode with an incomplete list of
+          // retained metadata.
+          CU.getOrCreateSubprogramDIE(CalleeSP);
+        }
       }
 
       // TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
 
       bool IsTail = TII->isTailCall(MI);
 
+      // If MI is in a bundle, the label was created after the bundle since
+      // EmitFunctionBody iterates over top-level MIs. Get that top-level MI
+      // to search for that label below.
+      const MachineInstr *TopLevelCallMI =
+          MI.isInsideBundle() ? &*getBundleStart(MI.getIterator()) : &MI;
+
       // For tail calls, for non-gdb tuning, no return PC information is needed.
       // For regular calls (and tail calls in GDB tuning), the return PC
       // is needed to disambiguate paths in the call graph which could lead to
       // some target function.
       const MCExpr *PCOffset =
-          (IsTail && !tuneForGDB()) ? nullptr
-                                    : getFunctionLocalOffsetAfterInsn(&MI);
+          (IsTail && !tuneForGDB())
+              ? nullptr
+              : getFunctionLocalOffsetAfterInsn(TopLevelCallMI);
 
-      // Address of a call-like instruction for a normal call or a jump-like
-      // instruction for a tail call. This is needed for GDB + DWARF 4 tuning.
+      // Return address of a call-like instruction for a normal call or a
+      // jump-like instruction for a tail call. This is needed for
+      // GDB + DWARF 4 tuning.
       const MCSymbol *PCAddr =
-          ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI))
-                             : nullptr;
+          ApplyGNUExtensions
+              ? const_cast<MCSymbol *>(getLabelAfterInsn(TopLevelCallMI))
+              : nullptr;
 
       assert((IsTail || PCOffset || PCAddr) &&
              "Call without return PC information");
@@ -837,10 +880,13 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
     // This CU is either a clang module DWO or a skeleton CU.
     NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
                   DIUnit->getDWOId());
-    if (!DIUnit->getSplitDebugFilename().empty())
+    if (!DIUnit->getSplitDebugFilename().empty()) {
       // This is a prefabricated skeleton CU.
-      NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
-                      DIUnit->getSplitDebugFilename());
+      dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+                                         ? dwarf::DW_AT_dwo_name
+                                         : dwarf::DW_AT_GNU_dwo_name;
+      NewCU.addString(Die, attrDWOName, DIUnit->getSplitDebugFilename());
+    }
   }
 }
 // Create new DwarfCompileUnit for the given metadata node with tag
@@ -878,11 +924,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
     NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
   }
 
-  // Create DIEs for function declarations used for call site debug info.
-  for (auto Scope : DIUnit->getRetainedTypes())
-    if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
-      NewCU.getOrCreateSubprogramDIE(SP);
-
   CUMap.insert({DIUnit, &NewCU});
   CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
   return NewCU;
@@ -974,6 +1015,7 @@ void DwarfDebug::beginModule() {
   // Create the symbol that points to the first entry following the debug
   // address table (.debug_addr) header.
   AddrPool.setLabel(Asm->createTempSymbol("addr_table_base"));
+  DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
 
   for (DICompileUnit *CUNode : M->debug_compile_units()) {
     // FIXME: Move local imported entities into a list attached to the
@@ -1077,11 +1119,17 @@ void DwarfDebug::finalizeModuleInfo() {
     // If we're splitting the dwarf out now that we've got the entire
     // CU then add the dwo id to it.
     auto *SkCU = TheCU.getSkeleton();
-    if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) {
+
+    bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();
+
+    if (HasSplitUnit) {
+      dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
+                                         ? dwarf::DW_AT_dwo_name
+                                         : dwarf::DW_AT_GNU_dwo_name;
       finishUnitAttributes(TheCU.getCUNode(), TheCU);
-      TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+      TheCU.addString(TheCU.getUnitDie(), attrDWOName,
                       Asm->TM.Options.MCOptions.SplitDwarfFile);
-      SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
+      SkCU->addString(SkCU->getUnitDie(), attrDWOName,
                       Asm->TM.Options.MCOptions.SplitDwarfFile);
       // Emit a unique identifier for this CU.
       uint64_t ID =
@@ -1127,29 +1175,34 @@ void DwarfDebug::finalizeModuleInfo() {
 
     // We don't keep track of which addresses are used in which CU so this
     // is a bit pessimistic under LTO.
-    if (!AddrPool.isEmpty() &&
-        (getDwarfVersion() >= 5 ||
-         (SkCU && !TheCU.getUnitDie().children().empty())))
+    if ((!AddrPool.isEmpty() || TheCU.hasRangeLists()) &&
+        (getDwarfVersion() >= 5 || HasSplitUnit))
       U.addAddrTableBase();
 
     if (getDwarfVersion() >= 5) {
       if (U.hasRangeLists())
         U.addRnglistsBase();
 
-      if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
-        DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
-        U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
-                          DebugLocs.getSym(),
-                          TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+      if (!DebugLocs.getLists().empty()) {
+        if (!useSplitDwarf())
+          U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+                            DebugLocs.getSym(),
+                            TLOF.getDwarfLoclistsSection()->getBeginSymbol());
       }
     }
 
     auto *CUNode = cast<DICompileUnit>(P.first);
     // If compile Unit has macros, emit "DW_AT_macro_info" attribute.
-    if (CUNode->getMacros())
-      U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
-                        U.getMacroLabelBegin(),
-                        TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+    if (CUNode->getMacros()) {
+      if (useSplitDwarf())
+        TheCU.addSectionDelta(TheCU.getUnitDie(), dwarf::DW_AT_macro_info,
+                            U.getMacroLabelBegin(),
+                            TLOF.getDwarfMacinfoDWOSection()->getBeginSymbol());
+      else
+        U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info,
+                          U.getMacroLabelBegin(),
+                          TLOF.getDwarfMacinfoSection()->getBeginSymbol());
+    }
   }
 
   // Emit all frontend-produced Skeleton CUs, i.e., Clang modules.
@@ -1185,9 +1238,10 @@ void DwarfDebug::endModule() {
   emitDebugStr();
 
   if (useSplitDwarf())
+    // Emit debug_loc.dwo/debug_loclists.dwo section.
     emitDebugLocDWO();
   else
-    // Emit info into a debug loc section.
+    // Emit debug_loc/debug_loclists section.
     emitDebugLoc();
 
   // Corresponding abbreviations into a abbrev section.
@@ -1203,8 +1257,12 @@ void DwarfDebug::endModule() {
   // Emit info into a debug ranges section.
   emitDebugRanges();
 
+  if (useSplitDwarf())
+  // Emit info into a debug macinfo.dwo section.
+    emitDebugMacinfoDWO();
+  else
   // Emit info into a debug macinfo section.
-  emitDebugMacinfo();
+    emitDebugMacinfo();
 
   if (useSplitDwarf()) {
     emitDebugStrDWO();
@@ -2208,6 +2266,11 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
     if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
       return;
     return DwarfExpr.addExpression(std::move(Cursor));
+  } else if (Value.isTargetIndexLocation()) {
+    TargetIndexLocation Loc = Value.getTargetIndexLocation();
+    // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
+    // encoding is supported.
+    DwarfExpr.addWasmLocation(Loc.Index, Loc.Offset);
   } else if (Value.isConstantFP()) {
     APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
     DwarfExpr.addUnsignedConstant(RawBytes);
@@ -2242,6 +2305,8 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
     DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
   }
   DwarfExpr.finalize();
+  if (DwarfExpr.TagOffset)
+    List.setTagOffset(*DwarfExpr.TagOffset);
 }
 
 void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
@@ -2296,7 +2361,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
   Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym());
 
   for (const RangeSpanList &List : Holder.getRangeLists())
-    Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(),
+    Asm->EmitLabelDifference(List.Label, Holder.getRnglistsTableBaseSym(),
                              4);
 
   return TableEnd;
@@ -2313,12 +2378,13 @@ static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
 
   const auto &DebugLocs = DD.getDebugLocs();
 
-  // FIXME: Generate the offsets table and use DW_FORM_loclistx with the
-  // DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
   Asm->OutStreamer->AddComment("Offset entry count");
-  Asm->emitInt32(0);
+  Asm->emitInt32(DebugLocs.getLists().size());
   Asm->OutStreamer->EmitLabel(DebugLocs.getSym());
 
+  for (const auto &List : DebugLocs.getLists())
+    Asm->EmitLabelDifference(List.Label, DebugLocs.getSym(), 4);
+
   return TableEnd;
 }
 
@@ -2418,32 +2484,27 @@ static void emitRangeList(
   }
 }
 
+// Handles emission of both debug_loclist / debug_loclist.dwo
 static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
-  emitRangeList(
-      DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU,
-      dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair,
-      dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list,
-      llvm::dwarf::LocListEncodingString,
-      /* ShouldUseBaseAddress */ true,
-      [&](const DebugLocStream::Entry &E) {
-        DD.emitDebugLocEntryLocation(E, List.CU);
-      });
+  emitRangeList(DD, Asm, List.Label, DD.getDebugLocs().getEntries(List),
+                *List.CU, dwarf::DW_LLE_base_addressx,
+                dwarf::DW_LLE_offset_pair, dwarf::DW_LLE_startx_length,
+                dwarf::DW_LLE_end_of_list, llvm::dwarf::LocListEncodingString,
+                /* ShouldUseBaseAddress */ true,
+                [&](const DebugLocStream::Entry &E) {
+                  DD.emitDebugLocEntryLocation(E, List.CU);
+                });
 }
 
-// Emit locations into the .debug_loc/.debug_rnglists section.
-void DwarfDebug::emitDebugLoc() {
+void DwarfDebug::emitDebugLocImpl(MCSection *Sec) {
   if (DebugLocs.getLists().empty())
     return;
 
+  Asm->OutStreamer->SwitchSection(Sec);
+
   MCSymbol *TableEnd = nullptr;
-  if (getDwarfVersion() >= 5) {
-    Asm->OutStreamer->SwitchSection(
-        Asm->getObjFileLowering().getDwarfLoclistsSection());
+  if (getDwarfVersion() >= 5)
     TableEnd = emitLoclistsTableHeader(Asm, *this);
-  } else {
-    Asm->OutStreamer->SwitchSection(
-        Asm->getObjFileLowering().getDwarfLocSection());
-  }
 
   for (const auto &List : DebugLocs.getLists())
     emitLocList(*this, Asm, List);
@@ -2452,11 +2513,28 @@ void DwarfDebug::emitDebugLoc() {
     Asm->OutStreamer->EmitLabel(TableEnd);
 }
 
+// Emit locations into the .debug_loc/.debug_loclists section.
+void DwarfDebug::emitDebugLoc() {
+  emitDebugLocImpl(
+      getDwarfVersion() >= 5
+          ? Asm->getObjFileLowering().getDwarfLoclistsSection()
+          : Asm->getObjFileLowering().getDwarfLocSection());
+}
+
+// Emit locations into the .debug_loc.dwo/.debug_loclists.dwo section.
 void DwarfDebug::emitDebugLocDWO() {
+  if (getDwarfVersion() >= 5) {
+    emitDebugLocImpl(
+        Asm->getObjFileLowering().getDwarfLoclistsDWOSection());
+
+    return;
+  }
+
   for (const auto &List : DebugLocs.getLists()) {
     Asm->OutStreamer->SwitchSection(
         Asm->getObjFileLowering().getDwarfLocDWOSection());
     Asm->OutStreamer->EmitLabel(List.Label);
+
     for (const auto &Entry : DebugLocs.getEntries(List)) {
       // GDB only supports startx_length in pre-standard split-DWARF.
       // (in v5 standard loclists, it currently* /only/ supports base_address +
@@ -2468,8 +2546,9 @@ void DwarfDebug::emitDebugLocDWO() {
       Asm->emitInt8(dwarf::DW_LLE_startx_length);
       unsigned idx = AddrPool.getIndex(Entry.Begin);
       Asm->EmitULEB128(idx);
+      // Also the pre-standard encoding is slightly different, emitting this as
+      // an address-length entry here, but its a ULEB128 in DWARFv5 loclists.
       Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4);
-
       emitDebugLocEntryLocation(Entry, List.CU);
     }
     Asm->emitInt8(dwarf::DW_LLE_end_of_list);
@@ -2639,19 +2718,33 @@ void DwarfDebug::emitDebugARanges() {
 /// Emit a single range list. We handle both DWARF v5 and earlier.
 static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
                           const RangeSpanList &List) {
-  emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(),
+  emitRangeList(DD, Asm, List.Label, List.Ranges, *List.CU,
                 dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
                 dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
                 llvm::dwarf::RangeListEncodingString,
-                List.getCU().getCUNode()->getRangesBaseAddress() ||
+                List.CU->getCUNode()->getRangesBaseAddress() ||
                     DD.getDwarfVersion() >= 5,
                 [](auto) {});
 }
 
-static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
-                                const DwarfFile &Holder, MCSymbol *TableEnd) {
+void DwarfDebug::emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section) {
+  if (Holder.getRangeLists().empty())
+    return;
+
+  assert(useRangesSection());
+  assert(!CUMap.empty());
+  assert(llvm::any_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
+    return !Pair.second->getCUNode()->isDebugDirectivesOnly();
+  }));
+
+  Asm->OutStreamer->SwitchSection(Section);
+
+  MCSymbol *TableEnd = nullptr;
+  if (getDwarfVersion() >= 5)
+    TableEnd = emitRnglistsTableHeader(Asm, Holder);
+
   for (const RangeSpanList &List : Holder.getRangeLists())
-    emitRangeList(DD, Asm, List);
+    emitRangeList(*this, Asm, List);
 
   if (TableEnd)
     Asm->OutStreamer->EmitLabel(TableEnd);
@@ -2660,55 +2753,17 @@ static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
 /// Emit address ranges into the .debug_ranges section or into the DWARF v5
 /// .debug_rnglists section.
 void DwarfDebug::emitDebugRanges() {
-  if (CUMap.empty())
-    return;
-
   const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
 
-  if (Holder.getRangeLists().empty())
-    return;
-
-  assert(useRangesSection());
-  assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
-    return Pair.second->getCUNode()->isDebugDirectivesOnly();
-  }));
-
-  // Start the dwarf ranges section.
-  MCSymbol *TableEnd = nullptr;
-  if (getDwarfVersion() >= 5) {
-    Asm->OutStreamer->SwitchSection(
-        Asm->getObjFileLowering().getDwarfRnglistsSection());
-    TableEnd = emitRnglistsTableHeader(Asm, Holder);
-  } else
-    Asm->OutStreamer->SwitchSection(
-        Asm->getObjFileLowering().getDwarfRangesSection());
-
-  emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+  emitDebugRangesImpl(Holder,
+                      getDwarfVersion() >= 5
+                          ? Asm->getObjFileLowering().getDwarfRnglistsSection()
+                          : Asm->getObjFileLowering().getDwarfRangesSection());
 }
 
 void DwarfDebug::emitDebugRangesDWO() {
-  assert(useSplitDwarf());
-
-  if (CUMap.empty())
-    return;
-
-  const auto &Holder = InfoHolder;
-
-  if (Holder.getRangeLists().empty())
-    return;
-
-  assert(getDwarfVersion() >= 5);
-  assert(useRangesSection());
-  assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
-    return Pair.second->getCUNode()->isDebugDirectivesOnly();
-  }));
-
-  // Start the dwarf ranges section.
-  Asm->OutStreamer->SwitchSection(
-      Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
-  MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder);
-
-  emitDebugRangesImpl(*this, Asm, Holder, TableEnd);
+  emitDebugRangesImpl(InfoHolder,
+                      Asm->getObjFileLowering().getDwarfRnglistsDWOSection());
 }
 
 void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) {
@@ -2745,35 +2800,30 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) {
   Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
 }
 
-/// Emit macros into a debug macinfo section.
-void DwarfDebug::emitDebugMacinfo() {
-  if (CUMap.empty())
-    return;
-
-  if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) {
-        return Pair.second->getCUNode()->isDebugDirectivesOnly();
-      }))
-    return;
-
-  // Start the dwarf macinfo section.
-  Asm->OutStreamer->SwitchSection(
-      Asm->getObjFileLowering().getDwarfMacinfoSection());
-
+void DwarfDebug::emitDebugMacinfoImpl(MCSection *Section) {
   for (const auto &P : CUMap) {
     auto &TheCU = *P.second;
-    if (TheCU.getCUNode()->isDebugDirectivesOnly())
-      continue;
     auto *SkCU = TheCU.getSkeleton();
     DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
     auto *CUNode = cast<DICompileUnit>(P.first);
     DIMacroNodeArray Macros = CUNode->getMacros();
-    if (!Macros.empty()) {
-      Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
-      handleMacroNodes(Macros, U);
-    }
+    if (Macros.empty())
+      continue;
+    Asm->OutStreamer->SwitchSection(Section);
+    Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin());
+    handleMacroNodes(Macros, U);
+    Asm->OutStreamer->AddComment("End Of Macro List Mark");
+    Asm->emitInt8(0);
   }
-  Asm->OutStreamer->AddComment("End Of Macro List Mark");
-  Asm->emitInt8(0);
+}
+
+/// Emit macros into a debug macinfo section.
+void DwarfDebug::emitDebugMacinfo() {
+  emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoSection());
+}
+
+void DwarfDebug::emitDebugMacinfoDWO() {
+  emitDebugMacinfoImpl(Asm->getObjFileLowering().getDwarfMacinfoDWOSection());
 }
 
 // DWARF5 Experimental Separate Dwarf emitters.
@@ -2792,7 +2842,8 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
 DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
 
   auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
-      CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
+      CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder,
+      UnitKind::Skeleton);
   DwarfCompileUnit &NewCU = *OwnedUnit;
   NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index c8c511f67c2a..fd82b1f98055 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -118,6 +118,9 @@ public:
 class DbgVariable : public DbgEntity {
   /// Offset in DebugLocs.
   unsigned DebugLocListIndex = ~0u;
+  /// DW_OP_LLVM_tag_offset value from DebugLocs.
+  Optional<uint8_t> DebugLocListTagOffset;
+
   /// Single value location description.
   std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
 
@@ -174,6 +177,8 @@ public:
 
   void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
   unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
+  void setDebugLocListTagOffset(uint8_t O) { DebugLocListTagOffset = O; }
+  Optional<uint8_t> getDebugLocListTagOffset() const { return DebugLocListTagOffset; }
   StringRef getName() const { return getVariable()->getName(); }
   const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
   /// Get the FI entries, sorted by fragment offset.
@@ -437,6 +442,9 @@ class DwarfDebug : public DebugHandlerBase {
   /// Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
 
+  /// Construct a DIE for the subprogram definition \p SP and return it.
+  DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
+
   /// Construct DIEs for call site entries describing the calls in \p MF.
   void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
                                   DIE &ScopeDIE, const MachineFunction &MF);
@@ -498,15 +506,21 @@ class DwarfDebug : public DebugHandlerBase {
   /// Emit variable locations into a debug loc dwo section.
   void emitDebugLocDWO();
 
+  void emitDebugLocImpl(MCSection *Sec);
+
   /// Emit address ranges into a debug aranges section.
   void emitDebugARanges();
 
   /// Emit address ranges into a debug ranges section.
   void emitDebugRanges();
   void emitDebugRangesDWO();
+  void emitDebugRangesImpl(const DwarfFile &Holder, MCSection *Section);
 
   /// Emit macros into a debug macinfo section.
   void emitDebugMacinfo();
+  /// Emit macros into a debug macinfo.dwo section.
+  void emitDebugMacinfoDWO();
+  void emitDebugMacinfoImpl(MCSection *Section);
   void emitMacro(DIMacro &M);
   void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U);
   void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 1c5a244d7c5d..310647f15a5e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -155,20 +155,18 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
     CurSubReg.set(Offset, Offset + Size);
 
     // If this sub-register has a DWARF number and we haven't covered
-    // its range, emit a DWARF piece for it.
-    if (CurSubReg.test(Coverage)) {
+    // its range, and its range covers the value, emit a DWARF piece for it.
+    if (Offset < MaxSize && CurSubReg.test(Coverage)) {
       // Emit a piece for any gap in the coverage.
       if (Offset > CurPos)
-        DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"});
+        DwarfRegs.push_back(
+            {-1, Offset - CurPos, "no DWARF register encoding"});
       DwarfRegs.push_back(
           {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"});
-      if (Offset >= MaxSize)
-        break;
-
-      // Mark it as emitted.
-      Coverage.set(Offset, Offset + Size);
-      CurPos = Offset + Size;
     }
+    // Mark it as emitted.
+    Coverage.set(Offset, Offset + Size);
+    CurPos = Offset + Size;
   }
   // Failed to find any DWARF encoding.
   if (CurPos == 0)
@@ -246,8 +244,8 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
   // a call site parameter expression and if that expression is just a register
   // location, emit it with addBReg and offset 0, because we should emit a DWARF
   // expression representing a value, rather than a location.
-  if (!isMemoryLocation() && !HasComplexExpression &&
-      (!isParameterValue() || isEntryValue())) {
+  if (!isMemoryLocation() && !HasComplexExpression && (!isParameterValue() ||
+                                                       isEntryValue())) {
     for (auto &Reg : DwarfRegs) {
       if (Reg.DwarfRegNo >= 0)
         addReg(Reg.DwarfRegNo, Reg.Comment);
@@ -391,6 +389,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base
       // location.
       assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
+      assert(SizeInBits >= OffsetInBits - FragmentOffset && "size underflow");
 
       // If addMachineReg already emitted DW_OP_piece operations to represent
       // a super-register by splicing together sub-registers, subtract the size
@@ -436,9 +435,6 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
       break;
     case dwarf::DW_OP_deref:
       assert(!isRegisterLocation());
-      // For more detailed explanation see llvm.org/PR43343.
-      assert(!isParameterValue() && "Parameter entry values should not be "
-                                    "dereferenced due to safety reasons.");
       if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
         // Turning this into a memory location description makes the deref
         // implicit.
@@ -576,3 +572,11 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
   emitUnsigned((1ULL << FromBits) - 1);
   emitOp(dwarf::DW_OP_and);
 }
+
+void DwarfExpression::addWasmLocation(unsigned Index, int64_t Offset) {
+  assert(LocationKind == Implicit || LocationKind == Unknown);
+  LocationKind = Implicit;
+  emitOp(dwarf::DW_OP_WASM_location);
+  emitUnsigned(Index);
+  emitSigned(Offset);
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 1ad46669f9b2..46c07b1d5b6b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -337,6 +337,10 @@ public:
 
   void emitLegacySExt(unsigned FromBits);
   void emitLegacyZExt(unsigned FromBits);
+
+  /// Emit location information expressed via WebAssembly location + offset
+  /// The Index is an identifier for locals, globals or operand stack.
+  void addWasmLocation(unsigned Index, int64_t Offset);
 };
 
 /// DwarfExpression implementation for .debug_loc entries.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index e3c9095d1343..e5c4db58f477 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -126,6 +126,6 @@ void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) {
 std::pair<uint32_t, RangeSpanList *>
 DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) {
   CURangeLists.push_back(
-      RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R)));
+      RangeSpanList{Asm->createTempSymbol("debug_ranges"), &CU, std::move(R)});
   return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back());
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 35fa51fb24c4..cf293d7534d0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -37,21 +37,12 @@ struct RangeSpan {
   const MCSymbol *End;
 };
 
-class RangeSpanList {
-private:
+struct RangeSpanList {
   // Index for locating within the debug_range section this particular span.
-  MCSymbol *RangeSym;
+  MCSymbol *Label;
   const DwarfCompileUnit *CU;
   // List of ranges.
   SmallVector<RangeSpan, 2> Ranges;
-
-public:
-  RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU,
-                SmallVector<RangeSpan, 2> Ranges)
-      : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {}
-  MCSymbol *getSym() const { return RangeSym; }
-  const DwarfCompileUnit &getCU() const { return *CU; }
-  const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
 };
 
 class DwarfFile {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 37c68c085792..1aba956c48de 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -188,8 +188,9 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
 
 /// Check whether the DIE for this MDNode can be shared across CUs.
 bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
-  // When the MDNode can be part of the type system, the DIE can be shared
-  // across CUs.
+  // When the MDNode can be part of the type system (this includes subprogram
+  // declarations *and* subprogram definitions, even local definitions), the
+  // DIE must be shared across CUs.
   // Combining type units and cross-CU DIE sharing is lower value (since
   // cross-CU DIE sharing is used in LTO and removes type redundancy at that
   // level already) but may be implementable for some value in projects
@@ -197,9 +198,7 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
   // together.
   if (isDwoUnit() && !DD->shareAcrossDWOCUs())
     return false;
-  return (isa<DIType>(D) ||
-          (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
-         !DD->generateTypeUnits();
+  return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
 }
 
 DIE *DwarfUnit::getDIE(const DINode *D) const {
@@ -800,6 +799,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
   if (!Name.empty())
     addString(Buffer, dwarf::DW_AT_name, Name);
 
+  // If alignment is specified for a typedef , create and insert DW_AT_alignment
+  // attribute in DW_TAG_typedef DIE.
+  if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
+    uint32_t AlignInBytes = DTy->getAlignInBytes();
+    if (AlignInBytes > 0)
+      addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+              AlignInBytes);
+  }
+
   // Add size if non-zero (derived types might be zero-sized.)
   if (Size && Tag != dwarf::DW_TAG_pointer_type
            && Tag != dwarf::DW_TAG_ptr_to_member_type
@@ -1114,8 +1122,8 @@ DIE *DwarfUnit::getOrCreateModule(const DIModule *M) {
               M->getConfigurationMacros());
   if (!M->getIncludePath().empty())
     addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath());
-  if (!M->getISysRoot().empty())
-    addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot());
+  if (!M->getSysRoot().empty())
+    addString(MDie, dwarf::DW_AT_LLVM_sysroot, M->getSysRoot());
 
   return &MDie;
 }
@@ -1224,6 +1232,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
        Language == dwarf::DW_LANG_ObjC))
     addFlag(SPDie, dwarf::DW_AT_prototyped);
 
+  if (SP->isObjCDirect())
+    addFlag(SPDie, dwarf::DW_AT_APPLE_objc_direct);
+
   unsigned CC = 0;
   DITypeRefArray Args;
   if (const DISubroutineType *SPTy = SP->getType()) {
@@ -1307,6 +1318,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
     addFlag(SPDie, dwarf::DW_AT_elemental);
   if (SP->isRecursive())
     addFlag(SPDie, dwarf::DW_AT_recursive);
+
+  if (DD->getDwarfVersion() >= 5 && SP->isDeleted())
+    addFlag(SPDie, dwarf::DW_AT_deleted);
 }
 
 void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 290be81c6baa..914308d9147e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains support for writing Win64 exception info into asm files.
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions, and valid longjmp targets.
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,6 +18,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -29,16 +31,69 @@ WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {}
 
 WinCFGuard::~WinCFGuard() {}
 
+void WinCFGuard::endFunction(const MachineFunction *MF) {
+
+  // Skip functions without any longjmp targets.
+  if (MF->getLongjmpTargets().empty())
+    return;
+
+  // Copy the function's longjmp targets to a module-level list.
+  LongjmpTargets.insert(LongjmpTargets.end(), MF->getLongjmpTargets().begin(),
+                        MF->getLongjmpTargets().end());
+}
+
+/// Returns true if this function's address is escaped in a way that might make
+/// it an indirect call target. Function::hasAddressTaken gives different
+/// results when a function is called directly with a function prototype
+/// mismatch, which requires a cast.
+static bool isPossibleIndirectCallTarget(const Function *F) {
+  SmallVector<const Value *, 4> Users{F};
+  while (!Users.empty()) {
+    const Value *FnOrCast = Users.pop_back_val();
+    for (const Use &U : FnOrCast->uses()) {
+      const User *FnUser = U.getUser();
+      if (isa<BlockAddress>(FnUser))
+        continue;
+      if (const auto *Call = dyn_cast<CallBase>(FnUser)) {
+        if (!Call->isCallee(&U))
+          return true;
+      } else if (isa<Instruction>(FnUser)) {
+        // Consider any other instruction to be an escape. This has some weird
+        // consequences like no-op intrinsics being an escape or a store *to* a
+        // function address being an escape.
+        return true;
+      } else if (const auto *C = dyn_cast<Constant>(FnUser)) {
+        // If this is a constant pointer cast of the function, don't consider
+        // this escape. Analyze the uses of the cast as well. This ensures that
+        // direct calls with mismatched prototypes don't end up in the CFG
+        // table. Consider other constants, such as vtable initializers, to
+        // escape the function.
+        if (C->stripPointerCasts() == F)
+          Users.push_back(FnUser);
+        else
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
 void WinCFGuard::endModule() {
   const Module *M = Asm->MMI->getModule();
   std::vector<const Function *> Functions;
   for (const Function &F : *M)
-    if (F.hasAddressTaken())
+    if (isPossibleIndirectCallTarget(&F))
       Functions.push_back(&F);
-  if (Functions.empty())
+  if (Functions.empty() && LongjmpTargets.empty())
     return;
   auto &OS = *Asm->OutStreamer;
   OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection());
   for (const Function *F : Functions)
     OS.EmitCOFFSymbolIndex(Asm->getSymbol(F));
+
+  // Emit the symbol index of each longjmp target.
+  OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGLJMPSection());
+  for (const MCSymbol *S : LongjmpTargets) {
+    OS.EmitCOFFSymbolIndex(S);
+  }
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
index def0a59ab007..494a153b05ba 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains support for writing windows exception info into asm files.
+// This file contains support for writing the metadata for Windows Control Flow
+// Guard, including address-taken functions, and valid longjmp targets.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,12 +16,14 @@
 
 #include "llvm/CodeGen/AsmPrinterHandler.h"
 #include "llvm/Support/Compiler.h"
+#include <vector>
 
 namespace llvm {
 
 class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler {
   /// Target of directive emission.
   AsmPrinter *Asm;
+  std::vector<const MCSymbol *> LongjmpTargets;
 
 public:
   WinCFGuard(AsmPrinter *A);
@@ -28,7 +31,7 @@ public:
 
   void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
 
-  /// Emit the Control Flow Guard function ID table
+  /// Emit the Control Flow Guard function ID table.
   void endModule() override;
 
   /// Gather pre-function debug information.
@@ -39,7 +42,7 @@ public:
   /// Gather post-function debug information.
   /// Please note that some AsmPrinter implementations may not call
   /// beginFunction at all.
-  void endFunction(const MachineFunction *MF) override {}
+  void endFunction(const MachineFunction *MF) override;
 
   /// Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI) override {}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 27b298dcf6af..37a50cde6391 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -38,6 +38,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
@@ -674,7 +675,7 @@ static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
 
   Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
   Ret.Mask = Builder.CreateShl(
-      ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
+      ConstantInt::get(Ret.WordType, (1 << (ValueSize * 8)) - 1), Ret.ShiftAmt,
       "Mask");
   Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
 
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 455916eeb82f..4b9c50aeb1d3 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -38,6 +39,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -46,6 +48,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
@@ -102,6 +105,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineBlockFrequencyInfo>();
       AU.addRequired<MachineBranchProbabilityInfo>();
+      AU.addRequired<ProfileSummaryInfoWrapperPass>();
       AU.addRequired<TargetPassConfig>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -128,7 +132,8 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
   BranchFolder::MBFIWrapper MBBFreqInfo(
       getAnalysis<MachineBlockFrequencyInfo>());
   BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
-                      getAnalysis<MachineBranchProbabilityInfo>());
+                      getAnalysis<MachineBranchProbabilityInfo>(),
+                      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
   auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
   return Folder.OptimizeFunction(
       MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
@@ -138,9 +143,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
 BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
                            MBFIWrapper &FreqInfo,
                            const MachineBranchProbabilityInfo &ProbInfo,
+                           ProfileSummaryInfo *PSI,
                            unsigned MinTailLength)
     : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
-      MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
+      MBBFreqInfo(FreqInfo), MBPI(ProbInfo), PSI(PSI) {
   if (MinCommonTailLength == 0)
     MinCommonTailLength = TailMergeSize;
   switch (FlagEnableTailMerge) {
@@ -301,113 +307,56 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
   return HashMachineInstr(*I);
 }
 
-///  Whether MI should be counted as an instruction when calculating common tail.
+/// Whether MI should be counted as an instruction when calculating common tail.
 static bool countsAsInstruction(const MachineInstr &MI) {
   return !(MI.isDebugInstr() || MI.isCFIInstruction());
 }
 
-/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
-/// of instructions they actually have in common together at their end.  Return
-/// iterators for the first shared instruction in each block.
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+                                MachineBasicBlock *MBB) {
+  while (I != MBB->begin()) {
+    --I;
+    if (countsAsInstruction(*I))
+      return I;
+  }
+  return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
 static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
                                         MachineBasicBlock *MBB2,
                                         MachineBasicBlock::iterator &I1,
                                         MachineBasicBlock::iterator &I2) {
-  I1 = MBB1->end();
-  I2 = MBB2->end();
+  MachineBasicBlock::iterator MBBI1 = MBB1->end();
+  MachineBasicBlock::iterator MBBI2 = MBB2->end();
 
   unsigned TailLen = 0;
-  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
-    --I1; --I2;
-    // Skip debugging pseudos; necessary to avoid changing the code.
-    while (!countsAsInstruction(*I1)) {
-      if (I1==MBB1->begin()) {
-        while (!countsAsInstruction(*I2)) {
-          if (I2==MBB2->begin()) {
-            // I1==DBG at begin; I2==DBG at begin
-            goto SkipTopCFIAndReturn;
-          }
-          --I2;
-        }
-        ++I2;
-        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I1;
-    }
-    // I1==first (untested) non-DBG preceding known match
-    while (!countsAsInstruction(*I2)) {
-      if (I2==MBB2->begin()) {
-        ++I1;
-        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
-        goto SkipTopCFIAndReturn;
-      }
-      --I2;
-    }
-    // I1, I2==first (untested) non-DBGs preceding known match
-    if (!I1->isIdenticalTo(*I2) ||
+  while (true) {
+    MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+    MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+    if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+      break;
+    if (!MBBI1->isIdenticalTo(*MBBI2) ||
         // FIXME: This check is dubious. It's used to get around a problem where
         // people incorrectly expect inline asm directives to remain in the same
         // relative order. This is untenable because normal compiler
         // optimizations (like this one) may reorder and/or merge these
         // directives.
-        I1->isInlineAsm()) {
-      ++I1; ++I2;
+        MBBI1->isInlineAsm()) {
       break;
     }
     ++TailLen;
-  }
-  // Back past possible debugging pseudos at beginning of block.  This matters
-  // when one block differs from the other only by whether debugging pseudos
-  // are present at the beginning. (This way, the various checks later for
-  // I1==MBB1->begin() work as expected.)
-  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
-    --I2;
-    while (I2->isDebugInstr()) {
-      if (I2 == MBB2->begin())
-        return TailLen;
-      --I2;
-    }
-    ++I2;
-  }
-  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
-    --I1;
-    while (I1->isDebugInstr()) {
-      if (I1 == MBB1->begin())
-        return TailLen;
-      --I1;
-    }
-    ++I1;
-  }
-
-SkipTopCFIAndReturn:
-  // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
-  // I1 and I2 are non-identical when compared and then one or both of them ends
-  // up pointing to a CFI instruction after being incremented. For example:
-  /*
-    BB1:
-    ...
-    INSTRUCTION_A
-    ADD32ri8  <- last common instruction
-    ...
-    BB2:
-    ...
-    INSTRUCTION_B
-    CFI_INSTRUCTION
-    ADD32ri8  <- last common instruction
-    ...
-  */
-  // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
-  // incrementing the iterators, I1 will point to ADD, however I2 will point to
-  // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
-  // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
-  // instruction.
-  while (I1 != MBB1->end() && I1->isCFIInstruction()) {
-    ++I1;
-  }
-
-  while (I2 != MBB2->end() && I2->isCFIInstruction()) {
-    ++I2;
+    I1 = MBBI1;
+    I2 = MBBI2;
   }
 
   return TailLen;
@@ -500,7 +449,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
       continue;
     if (I->isCall())
       Time += 10;
-    else if (I->mayLoad() || I->mayStore())
+    else if (I->mayLoadOrStore())
       Time += 2;
     else
       ++Time;
@@ -641,7 +590,9 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
                   MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
                   MachineBasicBlock *PredBB,
                   DenseMap<const MachineBasicBlock *, int> &EHScopeMembership,
-                  bool AfterPlacement) {
+                  bool AfterPlacement,
+                  BranchFolder::MBFIWrapper &MBBFreqInfo,
+                  ProfileSummaryInfo *PSI) {
   // It is never profitable to tail-merge blocks from two different EH scopes.
   if (!EHScopeMembership.empty()) {
     auto EHScope1 = EHScopeMembership.find(MBB1);
@@ -659,6 +610,17 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
                     << " and " << printMBBReference(*MBB2) << " is "
                     << CommonTailLen << '\n');
 
+  // Move the iterators to the beginning of the MBB if we only got debug
+  // instructions before the tail. This is to avoid splitting a block when we
+  // only got debug instructions before the tail (to be invariant on -g).
+  if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
+    I1 = MBB1->begin();
+  if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
+    I2 = MBB2->begin();
+
+  bool FullBlockTail1 = I1 == MBB1->begin();
+  bool FullBlockTail2 = I2 == MBB2->begin();
+
   // It's almost always profitable to merge any number of non-terminator
   // instructions with the block that falls through into the common successor.
   // This is true only for a single successor. For multiple successors, we are
@@ -677,7 +639,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
   // are unlikely to become a fallthrough target after machine block placement.
   // Tail merging these blocks is unlikely to create additional unconditional
   // branches, and will reduce the size of this cold code.
-  if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+  if (FullBlockTail1 && FullBlockTail2 &&
       blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
     return true;
 
@@ -685,16 +647,16 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
   // a position where the other could fall through into it, merge any number
   // of instructions, because it can be done without a branch.
   // TODO: If the blocks are not adjacent, move one of them so that they are?
-  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+  if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
     return true;
-  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+  if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
     return true;
 
   // If both blocks are identical and end in a branch, merge them unless they
   // both have a fallthrough predecessor and successor.
   // We can only do this after block placement because it depends on whether
   // there are fallthroughs, and we don't know until after layout.
-  if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+  if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
     auto BothFallThrough = [](MachineBasicBlock *MBB) {
       if (MBB->succ_size() != 0 && !MBB->canFallThrough())
         return false;
@@ -727,8 +689,12 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
   // branch instruction, which is likely to be smaller than the 2
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
-  return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
-         (I1 == MBB1->begin() || I2 == MBB2->begin());
+  bool OptForSize =
+      MF->getFunction().hasOptSize() ||
+      (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
+       llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
+  return EffectiveTailLen >= 2 && OptForSize &&
+         (FullBlockTail1 || FullBlockTail2);
 }
 
 unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
@@ -749,7 +715,7 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
                             CommonTailLen, TrialBBI1, TrialBBI2,
                             SuccBB, PredBB,
                             EHScopeMembership,
-                            AfterBlockPlacement)) {
+                            AfterBlockPlacement, MBBFreqInfo, PSI)) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;
@@ -869,7 +835,7 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
     assert(MBBICommon->isIdenticalTo(*MBBI) && "Expected matching MIIs!");
 
     // Merge MMOs from memory operations in the common block.
-    if (MBBICommon->mayLoad() || MBBICommon->mayStore())
+    if (MBBICommon->mayLoadOrStore())
       MBBICommon->cloneMergedMemRefs(*MBB->getParent(), {&*MBBICommon, &*MBBI});
     // Drop undef flags if they aren't present in all merged instructions.
     for (unsigned I = 0, E = MBBICommon->getNumOperands(); I != E; ++I) {
@@ -1579,8 +1545,10 @@ ReoptimizeBlock:
     }
   }
 
-  if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
-      MF.getFunction().hasOptSize()) {
+  bool OptForSize =
+      MF.getFunction().hasOptSize() ||
+      llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
+  if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
     // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
     // direction, thereby defeating careful block placement and regressing
     // performance. Therefore, only consider this for optsize functions.
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index 761ff9c7d54e..7a4c68ea09f5 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -27,6 +27,7 @@ class MachineFunction;
 class MachineLoopInfo;
 class MachineModuleInfo;
 class MachineRegisterInfo;
+class ProfileSummaryInfo;
 class raw_ostream;
 class TargetInstrInfo;
 class TargetRegisterInfo;
@@ -39,6 +40,7 @@ class TargetRegisterInfo;
                           bool CommonHoist,
                           MBFIWrapper &FreqInfo,
                           const MachineBranchProbabilityInfo &ProbInfo,
+                          ProfileSummaryInfo *PSI,
                           // Min tail length to merge. Defaults to commandline
                           // flag. Ignored for optsize.
                           unsigned MinTailLength = 0);
@@ -145,6 +147,7 @@ class TargetRegisterInfo;
                                   const BlockFrequency Freq) const;
       void view(const Twine &Name, bool isSimple = true);
       uint64_t getEntryFreq() const;
+      const MachineBlockFrequencyInfo &getMBFI() { return MBFI; }
 
     private:
       const MachineBlockFrequencyInfo &MBFI;
@@ -154,6 +157,7 @@ class TargetRegisterInfo;
   private:
     MBFIWrapper &MBBFreqInfo;
     const MachineBranchProbabilityInfo &MBPI;
+    ProfileSummaryInfo *PSI;
 
     bool TailMergeBlocks(MachineFunction &MF);
     bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 6efdc9efa968..f05517d178ae 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 709164e5f178..9bae9d36add1 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -19,13 +19,13 @@
 
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ReachingDefAnalysis.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 
-
 using namespace llvm;
 
 namespace llvm {
diff --git a/llvm/lib/CodeGen/CFGuardLongjmp.cpp b/llvm/lib/CodeGen/CFGuardLongjmp.cpp
new file mode 100644
index 000000000000..c3bf93855111
--- /dev/null
+++ b/llvm/lib/CodeGen/CFGuardLongjmp.cpp
@@ -0,0 +1,120 @@
+//===-- CFGuardLongjmp.cpp - Longjmp symbols for CFGuard --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains a machine function pass to insert a symbol after each
+/// call to _setjmp and store this in the MachineFunction's LongjmpTargets
+/// vector. This will be used to emit the table of valid longjmp targets used
+/// by Control Flow Guard.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cfguard-longjmp"
+
+STATISTIC(CFGuardLongjmpTargets,
+          "Number of Control Flow Guard longjmp targets");
+
+namespace {
+
+/// MachineFunction pass to insert a symbol after each call to _setjmp and store
+/// this in the MachineFunction's LongjmpTargets vector.
+class CFGuardLongjmp : public MachineFunctionPass {
+public:
+  static char ID;
+
+  CFGuardLongjmp() : MachineFunctionPass(ID) {
+    initializeCFGuardLongjmpPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override {
+    return "Control Flow Guard longjmp targets";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+char CFGuardLongjmp::ID = 0;
+
+INITIALIZE_PASS(CFGuardLongjmp, "CFGuardLongjmp",
+                "Insert symbols at valid longjmp targets for /guard:cf", false,
+                false)
+FunctionPass *llvm::createCFGuardLongjmpPass() { return new CFGuardLongjmp(); }
+
+bool CFGuardLongjmp::runOnMachineFunction(MachineFunction &MF) {
+
+  // Skip modules for which the cfguard flag is not set.
+  if (!MF.getMMI().getModule()->getModuleFlag("cfguard"))
+    return false;
+
+  // Skip functions that do not have calls to _setjmp.
+  if (!MF.getFunction().callsFunctionThatReturnsTwice())
+    return false;
+
+  SmallVector<MachineInstr *, 8> SetjmpCalls;
+
+  // Iterate over all instructions in the function and add calls to functions
+  // that return twice to the list of targets.
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+
+      // Skip instructions that are not calls.
+      if (!MI.isCall() || MI.getNumOperands() < 1)
+        continue;
+
+      // Iterate over operands to find calls to global functions.
+      for (MachineOperand &MO : MI.operands()) {
+        if (!MO.isGlobal())
+          continue;
+
+        auto *F = dyn_cast<Function>(MO.getGlobal());
+        if (!F)
+          continue;
+
+        // If the instruction calls a function that returns twice, add
+        // it to the list of targets.
+        if (F->hasFnAttribute(Attribute::ReturnsTwice)) {
+          SetjmpCalls.push_back(&MI);
+          break;
+        }
+      }
+    }
+  }
+
+  if (SetjmpCalls.empty())
+    return false;
+
+  unsigned SetjmpNum = 0;
+
+  // For each possible target, create a new symbol and insert it immediately
+  // after the call to setjmp. Add this symbol to the MachineFunction's list
+  // of longjmp targets.
+  for (MachineInstr *Setjmp : SetjmpCalls) {
+    SmallString<128> SymbolName;
+    raw_svector_ostream(SymbolName) << "$cfgsj_" << MF.getName() << SetjmpNum++;
+    MCSymbol *SjSymbol = MF.getContext().getOrCreateSymbol(SymbolName);
+
+    Setjmp->setPostInstrSymbol(MF, SjSymbol);
+    MF.addLongjmpTarget(SjSymbol);
+    CFGuardLongjmpTargets++;
+  }
+
+  return true;
+}
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 1a4d54231cfd..ef548c84d3c0 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
@@ -48,8 +49,7 @@ class CFIInstrInserter : public MachineFunctionPass {
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
-    if (!MF.getMMI().hasDebugInfo() &&
-        !MF.getFunction().needsUnwindTableEntry())
+    if (!MF.needsFrameMoves())
       return false;
 
     MBBVector.resize(MF.getNumBlockIDs());
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index ad9525f927e8..20fc67cc66ae 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -22,6 +22,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandPass(Registry);
   initializeBranchFolderPassPass(Registry);
   initializeBranchRelaxationPass(Registry);
+  initializeCFGuardLongjmpPass(Registry);
   initializeCFIInstrInserterPass(Registry);
   initializeCodeGenPreparePass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
@@ -104,6 +105,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeTailDuplicatePass(Registry);
   initializeTargetPassConfigPass(Registry);
   initializeTwoAddressInstructionPassPass(Registry);
+  initializeTypePromotionPass(Registry);
   initializeUnpackMachineBundlesPass(Registry);
   initializeUnreachableBlockElimLegacyPassPass(Registry);
   initializeUnreachableMachineBlockElimPass(Registry);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index fa4432ea23ec..f05afd058746 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -30,7 +30,6 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/Analysis.h"
@@ -61,6 +60,8 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsX86.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
@@ -73,6 +74,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
@@ -88,7 +90,9 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -222,6 +226,10 @@ static cl::opt<bool>
                          cl::init(true),
                          cl::desc("Enable splitting large offset of GEP."));
 
+static cl::opt<bool> EnableICMP_EQToICMP_ST(
+    "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
+    cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
+
 namespace {
 
 enum ExtType {
@@ -251,6 +259,7 @@ class TypePromotionTransaction;
     const LoopInfo *LI;
     std::unique_ptr<BlockFrequencyInfo> BFI;
     std::unique_ptr<BranchProbabilityInfo> BPI;
+    ProfileSummaryInfo *PSI;
 
     /// As we scan instructions optimizing them, this is the next instruction
     /// to optimize. Transforms that can invalidate this should update it.
@@ -293,7 +302,7 @@ class TypePromotionTransaction;
     /// Keep track of SExt promoted.
     ValueToSExts ValToSExtendedUses;
 
-    /// True if optimizing for size.
+    /// True if the function has the OptSize attribute.
     bool OptSize;
 
     /// DataLayout for the Function being processed.
@@ -370,6 +379,7 @@ class TypePromotionTransaction;
     bool optimizeSwitchInst(SwitchInst *SI);
     bool optimizeExtractElementInst(Instruction *Inst);
     bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
+    bool fixupDbgValue(Instruction *I);
     bool placeDbgValues(Function &F);
     bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
                       LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -429,10 +439,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   BPI.reset(new BranchProbabilityInfo(F, *LI));
   BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   OptSize = F.hasOptSize();
-
-  ProfileSummaryInfo *PSI =
-      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   if (ProfileGuidedSectionPrefix) {
     if (PSI->isFunctionHotInCallGraph(&F, *BFI))
       F.setSectionPrefix(".hot");
@@ -451,7 +459,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       // bypassSlowDivision may create new BBs, but we don't want to reapply the
       // optimization to those blocks.
       BasicBlock* Next = BB->getNextNode();
-      EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
+      // F.hasOptSize is already checked in the outer if statement.
+      if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
+        EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
       BB = Next;
     }
   }
@@ -1049,7 +1059,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
       // Collect all the relocate calls associated with a statepoint
       AllRelocateCalls.push_back(Relocate);
 
-  // We need atleast one base pointer relocation + one derived pointer
+  // We need at least one base pointer relocation + one derived pointer
   // relocation to mangle
   if (AllRelocateCalls.size() < 2)
     return false;
@@ -1408,6 +1418,93 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
   return MadeChange;
 }
 
+/// For pattern like:
+///
+///   DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
+///   ...
+/// DomBB:
+///   ...
+///   br DomCond, TrueBB, CmpBB
+/// CmpBB: (with DomBB being the single predecessor)
+///   ...
+///   Cmp = icmp eq CmpOp0, CmpOp1
+///   ...
+///
+/// It would use two comparison on targets that lowering of icmp sgt/slt is
+/// different from lowering of icmp eq (PowerPC). This function try to convert
+/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
+/// After that, DomCond and Cmp can use the same comparison so reduce one
+/// comparison.
+///
+/// Return true if any changes are made.
+static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
+                                       const TargetLowering &TLI) {
+  if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp())
+    return false;
+
+  ICmpInst::Predicate Pred = Cmp->getPredicate();
+  if (Pred != ICmpInst::ICMP_EQ)
+    return false;
+
+  // If icmp eq has users other than BranchInst and SelectInst, converting it to
+  // icmp slt/sgt would introduce more redundant LLVM IR.
+  for (User *U : Cmp->users()) {
+    if (isa<BranchInst>(U))
+      continue;
+    if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
+      continue;
+    return false;
+  }
+
+  // This is a cheap/incomplete check for dominance - just match a single
+  // predecessor with a conditional branch.
+  BasicBlock *CmpBB = Cmp->getParent();
+  BasicBlock *DomBB = CmpBB->getSinglePredecessor();
+  if (!DomBB)
+    return false;
+
+  // We want to ensure that the only way control gets to the comparison of
+  // interest is that a less/greater than comparison on the same operands is
+  // false.
+  Value *DomCond;
+  BasicBlock *TrueBB, *FalseBB;
+  if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
+    return false;
+  if (CmpBB != FalseBB)
+    return false;
+
+  Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
+  ICmpInst::Predicate DomPred;
+  if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
+    return false;
+  if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
+    return false;
+
+  // Convert the equality comparison to the opposite of the dominating
+  // comparison and swap the direction for all branch/select users.
+  // We have conceptually converted:
+  // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
+  // to
+  // Res = (a < b) ? <LT_RES> : (a > b)  ? <GT_RES> : <EQ_RES>;
+  // And similarly for branches.
+  for (User *U : Cmp->users()) {
+    if (auto *BI = dyn_cast<BranchInst>(U)) {
+      assert(BI->isConditional() && "Must be conditional");
+      BI->swapSuccessors();
+      continue;
+    }
+    if (auto *SI = dyn_cast<SelectInst>(U)) {
+      // Swap operands
+      SI->swapValues();
+      SI->swapProfMetadata();
+      continue;
+    }
+    llvm_unreachable("Must be a branch or a select");
+  }
+  Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
+  return true;
+}
+
 bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
   if (sinkCmpExpression(Cmp, *TLI))
     return true;
@@ -1418,6 +1515,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
   if (combineToUSubWithOverflow(Cmp, ModifiedDT))
     return true;
 
+  if (foldICmpWithDominatingICmp(Cmp, *TLI))
+    return true;
+
   return false;
 }
 
@@ -1842,7 +1942,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
   // cold block.  This interacts with our handling for loads and stores to
   // ensure that we can fold all uses of a potential addressing computation
   // into their uses.  TODO: generalize this to work over profiling data
-  if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+  bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get());
+  if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
     for (auto &Arg : CI->arg_operands()) {
       if (!Arg->getType()->isPointerTy())
         continue;
@@ -1907,6 +2008,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
     case Intrinsic::ctlz:
       // If counting zeros is expensive, try to avoid it.
       return despeculateCountZeros(II, TLI, DL, ModifiedDT);
+    case Intrinsic::dbg_value:
+      return fixupDbgValue(II);
     }
 
     if (TLI) {
@@ -2777,16 +2880,24 @@ class AddressingModeMatcher {
   /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
   bool IgnoreProfitability;
 
+  /// True if we are optimizing for size.
+  bool OptSize;
+
+  ProfileSummaryInfo *PSI;
+  BlockFrequencyInfo *BFI;
+
   AddressingModeMatcher(
       SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
       const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
       ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
       InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
-      std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
+      std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+      bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
       : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
         DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
         MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
-        PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
+        PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
+        OptSize(OptSize), PSI(PSI), BFI(BFI) {
     IgnoreProfitability = false;
   }
 
@@ -2804,12 +2915,14 @@ public:
         const TargetLowering &TLI, const TargetRegisterInfo &TRI,
         const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
         TypePromotionTransaction &TPT,
-        std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
+        std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
+        bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
     ExtAddrMode Result;
 
     bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
                                          MemoryInst, Result, InsertedInsts,
-                                         PromotedInsts, TPT, LargeOffsetGEP)
+                                         PromotedInsts, TPT, LargeOffsetGEP,
+                                         OptSize, PSI, BFI)
                        .matchAddr(V, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
     return Result;
@@ -4420,7 +4533,8 @@ static bool FindAllMemoryUses(
     Instruction *I,
     SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
     SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
-    const TargetRegisterInfo &TRI, int SeenInsts = 0) {
+    const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
+    BlockFrequencyInfo *BFI, int SeenInsts = 0) {
   // If we already considered this instruction, we're done.
   if (!ConsideredInsts.insert(I).second)
     return false;
@@ -4429,8 +4543,6 @@ static bool FindAllMemoryUses(
   if (!MightBeFoldableInst(I))
     return true;
 
-  const bool OptSize = I->getFunction()->hasOptSize();
-
   // Loop over all the uses, recursively processing them.
   for (Use &U : I->uses()) {
     // Conservatively return true if we're seeing a large number or a deep chain
@@ -4471,7 +4583,9 @@ static bool FindAllMemoryUses(
     if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
       // If this is a cold call, we can sink the addressing calculation into
       // the cold path.  See optimizeCallInst
-      if (!OptSize && CI->hasFnAttr(Attribute::Cold))
+      bool OptForSize = OptSize ||
+          llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+      if (!OptForSize && CI->hasFnAttr(Attribute::Cold))
         continue;
 
       InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
@@ -4483,8 +4597,8 @@ static bool FindAllMemoryUses(
       continue;
     }
 
-    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
-                          SeenInsts))
+    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+                          PSI, BFI, SeenInsts))
       return true;
   }
 
@@ -4572,7 +4686,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
   // the use is just a particularly nice way of sinking it.
   SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
   SmallPtrSet<Instruction*, 16> ConsideredInsts;
-  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+                        PSI, BFI))
     return false;  // Has a non-memory, non-foldable use!
 
   // Now that we know that all uses of this instruction are part of a chain of
@@ -4608,7 +4723,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
         TPT.getRestorationPoint();
     AddressingModeMatcher Matcher(
         MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
-        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
     Matcher.IgnoreProfitability = true;
     bool Success = Matcher.matchAddr(Address, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4714,7 +4829,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                                                                       0);
     ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
         V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
-        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
+        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+        BFI.get());
 
     GetElementPtrInst *GEP = LargeOffsetGEP.first;
     if (GEP && !NewGEPBases.count(GEP)) {
@@ -5932,7 +6048,9 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   // If branch conversion isn't desirable, exit early.
-  if (DisableSelectToBranch || OptSize || !TLI)
+  if (DisableSelectToBranch ||
+      OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) ||
+      !TLI)
     return false;
 
   // Find all consecutive select instructions that share the same condition.
@@ -7110,42 +7228,68 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
   return MadeChange;
 }
 
-// llvm.dbg.value is far away from the value then iSel may not be able
-// handle it properly. iSel will drop llvm.dbg.value if it can not
-// find a node corresponding to the value.
+// Some CGP optimizations may move or alter what's computed in a block. Check
+// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
+bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
+  assert(isa<DbgValueInst>(I));
+  DbgValueInst &DVI = *cast<DbgValueInst>(I);
+
+  // Does this dbg.value refer to a sunk address calculation?
+  Value *Location = DVI.getVariableLocation();
+  WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+  Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+  if (SunkAddr) {
+    // Point dbg.value at locally computed address, which should give the best
+    // opportunity to be accurately lowered. This update may change the type of
+    // pointer being referred to; however this makes no difference to debugging
+    // information, and we can't generate bitcasts that may affect codegen.
+    DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(),
+                                           ValueAsMetadata::get(SunkAddr)));
+    return true;
+  }
+  return false;
+}
+
+// A llvm.dbg.value may be using a value before its definition, due to
+// optimizations in this pass and others. Scan for such dbg.values, and rescue
+// them by moving the dbg.value to immediately after the value definition.
+// FIXME: Ideally this should never be necessary, and this has the potential
+// to re-order dbg.value intrinsics.
 bool CodeGenPrepare::placeDbgValues(Function &F) {
   bool MadeChange = false;
+  DominatorTree DT(F);
+
   for (BasicBlock &BB : F) {
-    Instruction *PrevNonDbgInst = nullptr;
     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
       Instruction *Insn = &*BI++;
       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
-      // Leave dbg.values that refer to an alloca alone. These
-      // intrinsics describe the address of a variable (= the alloca)
-      // being taken.  They should not be moved next to the alloca
-      // (and to the beginning of the scope), but rather stay close to
-      // where said address is used.
-      if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
-        PrevNonDbgInst = Insn;
+      if (!DVI)
         continue;
-      }
 
       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
-      if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
-        // If VI is a phi in a block with an EHPad terminator, we can't insert
-        // after it.
-        if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
-          continue;
-        LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
-                          << *DVI << ' ' << *VI);
-        DVI->removeFromParent();
-        if (isa<PHINode>(VI))
-          DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
-        else
-          DVI->insertAfter(VI);
-        MadeChange = true;
-        ++NumDbgValueMoved;
-      }
+
+      if (!VI || VI->isTerminator())
+        continue;
+
+      // If VI is a phi in a block with an EHPad terminator, we can't insert
+      // after it.
+      if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+        continue;
+
+      // If the defining instruction dominates the dbg.value, we do not need
+      // to move the dbg.value.
+      if (DT.dominates(VI, DVI))
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+                        << *DVI << ' ' << *VI);
+      DVI->removeFromParent();
+      if (isa<PHINode>(VI))
+        DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+      else
+        DVI->insertAfter(VI);
+      MadeChange = true;
+      ++NumDbgValueMoved;
     }
   }
   return MadeChange;
@@ -7201,6 +7345,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
     if (Br1->getMetadata(LLVMContext::MD_unpredictable))
       continue;
 
+    // The merging of mostly empty BB can cause a degenerate branch.
+    if (TBB == FBB)
+      continue;
+
     unsigned Opc;
     Value *Cond1, *Cond2;
     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 702e7e244bce..8d9d48402b31 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -261,15 +261,25 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI.getOperand(i);
 
-      if (MO.isRegMask())
-        for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
-          if (MO.clobbersPhysReg(i)) {
+      if (MO.isRegMask()) {
+        auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
+          for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
+            if (!MO.clobbersPhysReg(*SRI))
+              return false;
+
+          return true;
+        };
+
+        for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+          if (ClobbersPhysRegAndSubRegs(i)) {
             DefIndices[i] = Count;
             KillIndices[i] = ~0u;
             KeepRegs.reset(i);
             Classes[i] = nullptr;
             RegRefs.erase(i);
           }
+        }
+      }
 
       if (!MO.isReg()) continue;
       Register Reg = MO.getReg();
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index a169c3cb16b2..afcf014bca40 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -52,68 +52,22 @@ static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
 
 static unsigned InstrCount = 0;
 
-// --------------------------------------------------------------------
-// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
-
-static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
-  return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
-}
-
-/// Return the DFAInput for an instruction class input vector.
-/// This function is used in both DFAPacketizer.cpp and in
-/// DFAPacketizerEmitter.cpp.
-static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
-  DFAInput InsnInput = 0;
-  assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
-         "Exceeded maximum number of DFA terms");
-  for (auto U : InsnClass)
-    InsnInput = addDFAFuncUnits(InsnInput, U);
-  return InsnInput;
-}
-
-// --------------------------------------------------------------------
-
-// Make sure DFA types are large enough for the number of terms & resources.
-static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
-              (8 * sizeof(DFAInput)),
-              "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
-static_assert(
-    (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
-    "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
-
-// Return the DFAInput for an instruction class.
-DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
-  // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
-  DFAInput InsnInput = 0;
-  unsigned i = 0;
-  (void)i;
-  for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
-       *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
-    InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
-    assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
-  }
-  return InsnInput;
-}
-
-// Return the DFAInput for an instruction class input vector.
-DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
-  return getDFAInsnInput(InsnClass);
-}
-
 // Check if the resources occupied by a MCInstrDesc are available in the
 // current state.
 bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
-  unsigned InsnClass = MID->getSchedClass();
-  DFAInput InsnInput = getInsnInput(InsnClass);
-  return A.canAdd(InsnInput);
+  unsigned Action = ItinActions[MID->getSchedClass()];
+  if (MID->getSchedClass() == 0 || Action == 0)
+    return false;
+  return A.canAdd(Action);
 }
 
 // Reserve the resources occupied by a MCInstrDesc and change the current
 // state to reflect that change.
 void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
-  unsigned InsnClass = MID->getSchedClass();
-  DFAInput InsnInput = getInsnInput(InsnClass);
-  A.add(InsnInput);
+  unsigned Action = ItinActions[MID->getSchedClass()];
+  if (MID->getSchedClass() == 0 || Action == 0)
+    return;
+  A.add(Action);
 }
 
 // Check if the resources occupied by a machine instruction are available
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 9a537c859a67..d1529b08f708 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -81,6 +82,15 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
         if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
           return false;
       } else {
+        if (MO.isDead()) {
+#ifndef NDEBUG
+          // Sanity check on uses of this dead register. All of them should be
+          // 'undef'.
+          for (auto &U : MRI->use_nodbg_operands(Reg))
+            assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
+#endif
+          continue;
+        }
         for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
           if (&Use != MI)
             // This def has a non-debug use. Don't delete the instruction!
diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index ddd6cec5a178..af347fd7e73d 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -30,9 +29,11 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cstddef>
 
 using namespace llvm;
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index e5694218b5c3..d45e424184d7 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -940,6 +941,7 @@ class EarlyIfPredicator : public MachineFunctionPass {
   TargetSchedModel SchedModel;
   MachineRegisterInfo *MRI;
   MachineDominatorTree *DomTree;
+  MachineBranchProbabilityInfo *MBPI;
   MachineLoopInfo *Loops;
   SSAIfConv IfConv;
 
@@ -965,10 +967,12 @@ char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
 INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
                     false)
 
 void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineBranchProbabilityInfo>();
   AU.addRequired<MachineDominatorTree>();
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
@@ -978,6 +982,7 @@ void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
 
 /// Apply the target heuristic to decide if the transformation is profitable.
 bool EarlyIfPredicator::shouldConvertIf() {
+  auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB);
   if (IfConv.isTriangle()) {
     MachineBasicBlock &IfBlock =
         (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;
@@ -992,7 +997,7 @@ bool EarlyIfPredicator::shouldConvertIf() {
     }
 
     return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,
-                                    BranchProbability::getUnknown());
+                                    TrueProbability);
   }
   unsigned TExtra = 0;
   unsigned FExtra = 0;
@@ -1011,8 +1016,7 @@ bool EarlyIfPredicator::shouldConvertIf() {
     FExtra += TII->getPredicationCost(I);
   }
   return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,
-                                  FCycle, FExtra,
-                                  BranchProbability::getUnknown());
+                                  FCycle, FExtra, TrueProbability);
 }
 
 /// Attempt repeated if-conversion on MBB, return true if successful.
@@ -1043,6 +1047,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
   SchedModel.init(&STI);
   DomTree = &getAnalysis<MachineDominatorTree>();
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
   bool Changed = false;
   IfConv.runOnMachineFunction(MF);
diff --git a/llvm/lib/CodeGen/EdgeBundles.cpp b/llvm/lib/CodeGen/EdgeBundles.cpp
index 486720cadd27..dfaf7f584652 100644
--- a/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 9916f2de0414..a1adf4ef9820 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -13,6 +13,8 @@
 
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -20,6 +22,8 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 
 using namespace llvm;
 
@@ -264,9 +268,9 @@ Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source,
                                                 uint64_t OffsetBytes) {
   if (OffsetBytes > 0) {
     auto *ByteType = Type::getInt8Ty(CI->getContext());
-    Source = Builder.CreateGEP(
+    Source = Builder.CreateConstGEP1_64(
         ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
-        ConstantInt::get(ByteType, OffsetBytes));
+        OffsetBytes);
   }
   return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
 }
@@ -720,7 +724,8 @@ Value *MemCmpExpansion::getMemCmpExpansion() {
 ///  %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
 ///  ret i32 %phi.res
 static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
-                         const TargetLowering *TLI, const DataLayout *DL) {
+                         const TargetLowering *TLI, const DataLayout *DL,
+                         ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
   NumMemCmpCalls++;
 
   // Early exit from expansion if -Oz.
@@ -741,18 +746,20 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   // TTI call to check if target would like to expand memcmp. Also, get the
   // available load sizes.
   const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
-  auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
+  bool OptForSize = CI->getFunction()->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+  auto Options = TTI->enableMemCmpExpansion(OptForSize,
                                             IsUsedForZeroCmp);
   if (!Options) return false;
 
   if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
     Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
 
-  if (CI->getFunction()->hasOptSize() &&
+  if (OptForSize &&
       MaxLoadsPerMemcmpOptSize.getNumOccurrences())
     Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
 
-  if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
+  if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
     Options.MaxNumLoads = MaxLoadsPerMemcmp;
 
   MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
@@ -798,7 +805,11 @@ public:
         &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
     const TargetTransformInfo *TTI =
         &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-    auto PA = runImpl(F, TLI, TTI, TL);
+    auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+    auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+           &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+           nullptr;
+    auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI);
     return !PA.areAllPreserved();
   }
 
@@ -806,22 +817,26 @@ private:
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
     FunctionPass::getAnalysisUsage(AU);
   }
 
   PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
                             const TargetTransformInfo *TTI,
-                            const TargetLowering* TL);
+                            const TargetLowering* TL,
+                            ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI);
   // Returns true if a change was made.
   bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
                   const TargetTransformInfo *TTI, const TargetLowering* TL,
-                  const DataLayout& DL);
+                  const DataLayout& DL, ProfileSummaryInfo *PSI,
+                  BlockFrequencyInfo *BFI);
 };
 
 bool ExpandMemCmpPass::runOnBlock(
     BasicBlock &BB, const TargetLibraryInfo *TLI,
     const TargetTransformInfo *TTI, const TargetLowering* TL,
-    const DataLayout& DL) {
+    const DataLayout& DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
   for (Instruction& I : BB) {
     CallInst *CI = dyn_cast<CallInst>(&I);
     if (!CI) {
@@ -830,7 +845,7 @@ bool ExpandMemCmpPass::runOnBlock(
     LibFunc Func;
     if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
         (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
-        expandMemCmp(CI, TTI, TL, &DL)) {
+        expandMemCmp(CI, TTI, TL, &DL, PSI, BFI)) {
       return true;
     }
   }
@@ -840,11 +855,12 @@ bool ExpandMemCmpPass::runOnBlock(
 
 PreservedAnalyses ExpandMemCmpPass::runImpl(
     Function &F, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI,
-    const TargetLowering* TL) {
+    const TargetLowering* TL, ProfileSummaryInfo *PSI,
+    BlockFrequencyInfo *BFI) {
   const DataLayout& DL = F.getParent()->getDataLayout();
   bool MadeChanges = false;
   for (auto BBIt = F.begin(); BBIt != F.end();) {
-    if (runOnBlock(*BBIt, TLI, TTI, TL, DL)) {
+    if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI)) {
       MadeChanges = true;
       // If changes were made, restart the function from the beginning, since
       // the structure of the function was changed.
@@ -863,6 +879,8 @@ INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
                       "Expand memcmp() to load/stores", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
                     "Expand memcmp() to load/stores", false, false)
 
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index 1fc57fac1489..842211c09134 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index 1069a2423b8b..4ccf1d2c8c50 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
@@ -78,14 +79,32 @@ RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
 bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
   bool Changed = false;
   SmallVector<IntrinsicInst *, 4> Worklist;
-  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-    if (auto II = dyn_cast<IntrinsicInst>(&*I))
-      Worklist.push_back(II);
+  for (auto &I : instructions(F)) {
+    if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::experimental_vector_reduce_v2_fadd:
+      case Intrinsic::experimental_vector_reduce_v2_fmul:
+      case Intrinsic::experimental_vector_reduce_add:
+      case Intrinsic::experimental_vector_reduce_mul:
+      case Intrinsic::experimental_vector_reduce_and:
+      case Intrinsic::experimental_vector_reduce_or:
+      case Intrinsic::experimental_vector_reduce_xor:
+      case Intrinsic::experimental_vector_reduce_smax:
+      case Intrinsic::experimental_vector_reduce_smin:
+      case Intrinsic::experimental_vector_reduce_umax:
+      case Intrinsic::experimental_vector_reduce_umin:
+      case Intrinsic::experimental_vector_reduce_fmax:
+      case Intrinsic::experimental_vector_reduce_fmin:
+        if (TTI->shouldExpandReduction(II))
+          Worklist.push_back(II);
 
-  for (auto *II : Worklist) {
-    if (!TTI->shouldExpandReduction(II))
-      continue;
+        break;
+      }
+    }
+  }
 
+  for (auto *II : Worklist) {
     FastMathFlags FMF =
         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
     Intrinsic::ID ID = II->getIntrinsicID();
@@ -96,6 +115,7 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
     Builder.setFastMathFlags(FMF);
     switch (ID) {
+    default: llvm_unreachable("Unexpected intrinsic!");
     case Intrinsic::experimental_vector_reduce_v2_fadd:
     case Intrinsic::experimental_vector_reduce_v2_fmul: {
       // FMFs must be attached to the call, otherwise it's an ordered reduction
@@ -105,11 +125,15 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
       if (!FMF.allowReassoc())
         Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
       else {
+        if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+          continue;
+
         Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
         Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
                                   Acc, Rdx, "bin.rdx");
       }
-    } break;
+      break;
+    }
     case Intrinsic::experimental_vector_reduce_add:
     case Intrinsic::experimental_vector_reduce_mul:
     case Intrinsic::experimental_vector_reduce_and:
@@ -122,10 +146,12 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
     case Intrinsic::experimental_vector_reduce_fmax:
     case Intrinsic::experimental_vector_reduce_fmin: {
       Value *Vec = II->getArgOperand(0);
+      if (!isPowerOf2_32(Vec->getType()->getVectorNumElements()))
+        continue;
+
       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
-    } break;
-    default:
-      continue;
+      break;
+    }
     }
     II->replaceAllUsesWith(Rdx);
     II->eraseFromParent();
diff --git a/llvm/lib/CodeGen/FEntryInserter.cpp b/llvm/lib/CodeGen/FEntryInserter.cpp
index a122f490884e..4c0f30bce820 100644
--- a/llvm/lib/CodeGen/FEntryInserter.cpp
+++ b/llvm/lib/CodeGen/FEntryInserter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/FaultMaps.cpp b/llvm/lib/CodeGen/FaultMaps.cpp
index 600f72d320eb..de0b4fa87098 100644
--- a/llvm/lib/CodeGen/FaultMaps.cpp
+++ b/llvm/lib/CodeGen/FaultMaps.cpp
@@ -28,11 +28,9 @@ const char *FaultMaps::WFMP = "Fault Maps: ";
 FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {}
 
 void FaultMaps::recordFaultingOp(FaultKind FaultTy,
+                                 const MCSymbol *FaultingLabel,
                                  const MCSymbol *HandlerLabel) {
   MCContext &OutContext = AP.OutStreamer->getContext();
-  MCSymbol *FaultingLabel = OutContext.createTempSymbol();
-
-  AP.OutStreamer->EmitLabel(FaultingLabel);
 
   const MCExpr *FaultingOffset = MCBinaryExpr::createSub(
       MCSymbolRefExpr::create(FaultingLabel, OutContext),
diff --git a/llvm/lib/CodeGen/FinalizeISel.cpp b/llvm/lib/CodeGen/FinalizeISel.cpp
index 772d7f71bb37..00040e92a829 100644
--- a/llvm/lib/CodeGen/FinalizeISel.cpp
+++ b/llvm/lib/CodeGen/FinalizeISel.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/FuncletLayout.cpp b/llvm/lib/CodeGen/FuncletLayout.cpp
index 75f6d0b8f0bf..f1222a88b054 100644
--- a/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "funclet-layout"
diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp
index c1d22ef89195..600d662e0f99 100644
--- a/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/llvm/lib/CodeGen/GCMetadata.cpp
@@ -10,11 +10,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index 0dc0a5bce747..90e5f32f53b3 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 7d9d812d34bc..e6abfcdb92cb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -10,11 +10,16 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
 
 #define DEBUG_TYPE "cseinfo"
 
 using namespace llvm;
 char llvm::GISelCSEAnalysisWrapperPass::ID = 0;
+GISelCSEAnalysisWrapperPass::GISelCSEAnalysisWrapperPass()
+    : MachineFunctionPass(ID) {
+  initializeGISelCSEAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+}
 INITIALIZE_PASS_BEGIN(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
                       "Analysis containing CSE Info", false, true)
 INITIALIZE_PASS_END(GISelCSEAnalysisWrapperPass, DEBUG_TYPE,
@@ -52,7 +57,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
   case TargetOpcode::G_ANYEXT:
   case TargetOpcode::G_UNMERGE_VALUES:
   case TargetOpcode::G_TRUNC:
-  case TargetOpcode::G_GEP:
+  case TargetOpcode::G_PTR_ADD:
     return true;
   }
   return false;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index cdad92f7db4f..4c2dbdd905f3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -65,7 +65,11 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
   Info.SwiftErrorVReg = SwiftErrorVReg;
   Info.IsMustTailCall = CS.isMustTailCall();
   Info.IsTailCall = CS.isTailCall() &&
-                    isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
+                    isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()) &&
+                    (MIRBuilder.getMF()
+                         .getFunction()
+                         .getFnAttribute("disable-tail-calls")
+                         .getValueAsString() != "true");
   Info.IsVarArg = CS.getFunctionType()->isVarArg();
   return lowerCall(MIRBuilder, Info);
 }
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 854769d283f7..a103e8e4e6e0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -74,12 +74,35 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
     return false;
   Register DstReg = MI.getOperand(0).getReg();
   Register SrcReg = MI.getOperand(1).getReg();
+
+  // Give up if either DstReg or SrcReg  is a physical register.
+  if (Register::isPhysicalRegister(DstReg) ||
+      Register::isPhysicalRegister(SrcReg))
+    return false;
+
+  // Give up the types don't match.
   LLT DstTy = MRI.getType(DstReg);
   LLT SrcTy = MRI.getType(SrcReg);
-  // Simple Copy Propagation.
-  // a(sx) = COPY b(sx) -> Replace all uses of a with b.
-  if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
+  // Give up if one has a valid LLT, but the other doesn't.
+  if (DstTy.isValid() != SrcTy.isValid())
+    return false;
+  // Give up if the types don't match.
+  if (DstTy.isValid() && SrcTy.isValid() && DstTy != SrcTy)
+    return false;
+
+  // Get the register banks and classes.
+  const RegisterBank *DstBank = MRI.getRegBankOrNull(DstReg);
+  const RegisterBank *SrcBank = MRI.getRegBankOrNull(SrcReg);
+  const TargetRegisterClass *DstRC = MRI.getRegClassOrNull(DstReg);
+  const TargetRegisterClass *SrcRC = MRI.getRegClassOrNull(SrcReg);
+
+  // Replace if the register constraints match.
+  if ((SrcRC == DstRC) && (SrcBank == DstBank))
+    return true;
+  // Replace if DstReg has no constraints.
+  if (!DstBank && !DstRC)
     return true;
+
   return false;
 }
 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
@@ -109,10 +132,7 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
   // Walk over all the operands of concat vectors and check if they are
   // build_vector themselves or undef.
   // Then collect their operands in Ops.
-  for (const MachineOperand &MO : MI.operands()) {
-    // Skip the instruction definition.
-    if (MO.isDef())
-      continue;
+  for (const MachineOperand &MO : MI.uses()) {
     Register Reg = MO.getReg();
     MachineInstr *Def = MRI.getVRegDef(Reg);
     assert(Def && "Operand not defined");
@@ -121,12 +141,8 @@ bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
       IsUndef = false;
       // Remember the operands of the build_vector to fold
       // them into the yet-to-build flattened concat vectors.
-      for (const MachineOperand &BuildVecMO : Def->operands()) {
-        // Skip the definition.
-        if (BuildVecMO.isDef())
-          continue;
+      for (const MachineOperand &BuildVecMO : Def->uses())
         Ops.push_back(BuildVecMO.getReg());
-      }
       break;
     case TargetOpcode::G_IMPLICIT_DEF: {
       LLT OpType = MRI.getType(Reg);
@@ -189,8 +205,11 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
   Register Src1 = MI.getOperand(1).getReg();
   LLT SrcType = MRI.getType(Src1);
-  unsigned DstNumElts = DstType.getNumElements();
-  unsigned SrcNumElts = SrcType.getNumElements();
+  // As bizarre as it may look, shuffle vector can actually produce
+  // scalar! This is because at the IR level a <1 x ty> shuffle
+  // vector is perfectly valid.
+  unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
+  unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
 
   // If the resulting vector is smaller than the size of the source
   // vectors being concatenated, we won't be able to replace the
@@ -199,7 +218,15 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
   // Note: We may still be able to produce a concat_vectors fed by
   //       extract_vector_elt and so on. It is less clear that would
   //       be better though, so don't bother for now.
-  if (DstNumElts < 2 * SrcNumElts)
+  //
+  // If the destination is a scalar, the size of the sources doesn't
+  // matter. we will lower the shuffle to a plain copy. This will
+  // work only if the source and destination have the same size. But
+  // that's covered by the next condition.
+  //
+  // TODO: If the size between the source and destination don't match
+  //       we could still emit an extract vector element in that case.
+  if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
     return false;
 
   // Check that the shuffle mask can be broken evenly between the
@@ -212,8 +239,7 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
   // vectors.
   unsigned NumConcat = DstNumElts / SrcNumElts;
   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
-  SmallVector<int, 8> Mask;
-  ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask);
+  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
   for (unsigned i = 0; i != DstNumElts; ++i) {
     int Idx = Mask[i];
     // Undef value.
@@ -254,7 +280,10 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
   Builder.setInsertPt(*MI.getParent(), MI);
   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
 
-  Builder.buildConcatVectors(NewDstReg, Ops);
+  if (Ops.size() == 1)
+    Builder.buildCopy(NewDstReg, Ops[0]);
+  else
+    Builder.buildMerge(NewDstReg, Ops);
 
   MI.eraseFromParent();
   replaceRegWith(MRI, DstReg, NewDstReg);
@@ -571,7 +600,7 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
 
   for (auto &Use : MRI.use_instructions(Base)) {
-    if (Use.getOpcode() != TargetOpcode::G_GEP)
+    if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
       continue;
 
     Offset = Use.getOperand(2).getReg();
@@ -597,8 +626,8 @@ bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
     // forming an indexed one.
 
     bool MemOpDominatesAddrUses = true;
-    for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
-      if (!dominates(MI, GEPUse)) {
+    for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+      if (!dominates(MI, PtrAddUse)) {
         MemOpDominatesAddrUses = false;
         break;
       }
@@ -631,7 +660,7 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
 #endif
 
   Addr = MI.getOperand(1).getReg();
-  MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
+  MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
   if (!AddrDef || MRI.hasOneUse(Addr))
     return false;
 
@@ -667,8 +696,8 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
     }
   }
 
-  // FIXME: check whether all uses of the base pointer are constant GEPs. That
-  // might allow us to end base's liveness here by adjusting the constant.
+  // FIXME: check whether all uses of the base pointer are constant PtrAdds.
+  // That might allow us to end base's liveness here by adjusting the constant.
 
   for (auto &UseMI : MRI.use_instructions(Addr)) {
     if (!dominates(MI, UseMI)) {
@@ -681,18 +710,36 @@ bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
 }
 
 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+  IndexedLoadStoreMatchInfo MatchInfo;
+  if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
+    applyCombineIndexedLoadStore(MI, MatchInfo);
+    return true;
+  }
+  return false;
+}
+
+bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
   unsigned Opcode = MI.getOpcode();
   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
     return false;
 
-  bool IsStore = Opcode == TargetOpcode::G_STORE;
-  Register Addr, Base, Offset;
-  bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
-  if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
+  MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+                                          MatchInfo.Offset);
+  if (!MatchInfo.IsPre &&
+      !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
+                              MatchInfo.Offset))
     return false;
 
+  return true;
+}
 
+void CombinerHelper::applyCombineIndexedLoadStore(
+    MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
+  MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
+  MachineIRBuilder MIRBuilder(MI);
+  unsigned Opcode = MI.getOpcode();
+  bool IsStore = Opcode == TargetOpcode::G_STORE;
   unsigned NewOpcode;
   switch (Opcode) {
   case TargetOpcode::G_LOAD:
@@ -711,25 +758,22 @@ bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
     llvm_unreachable("Unknown load/store opcode");
   }
 
-  MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
-  MachineIRBuilder MIRBuilder(MI);
   auto MIB = MIRBuilder.buildInstr(NewOpcode);
   if (IsStore) {
-    MIB.addDef(Addr);
+    MIB.addDef(MatchInfo.Addr);
     MIB.addUse(MI.getOperand(0).getReg());
   } else {
     MIB.addDef(MI.getOperand(0).getReg());
-    MIB.addDef(Addr);
+    MIB.addDef(MatchInfo.Addr);
   }
 
-  MIB.addUse(Base);
-  MIB.addUse(Offset);
-  MIB.addImm(IsPre);
+  MIB.addUse(MatchInfo.Base);
+  MIB.addUse(MatchInfo.Offset);
+  MIB.addImm(MatchInfo.IsPre);
   MI.eraseFromParent();
   AddrDef.eraseFromParent();
 
   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
-  return true;
 }
 
 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
@@ -1016,7 +1060,7 @@ bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val
     if (DstOff != 0) {
       auto Offset =
           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
-      Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+      Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
     }
 
     MIB.buildStore(Value, Ptr, *StoreMMO);
@@ -1121,13 +1165,13 @@ bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
     if (CurrOffset != 0) {
       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
                    .getReg(0);
-      LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
     }
     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
 
     // Create the store.
     Register StorePtr =
-        CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+        CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
     CurrOffset += CopyTy.getSizeInBytes();
     Size -= CopyTy.getSizeInBytes();
@@ -1218,7 +1262,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
     if (CurrOffset != 0) {
       auto Offset =
           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
-      LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+      LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
     }
     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
     CurrOffset += CopyTy.getSizeInBytes();
@@ -1235,7 +1279,7 @@ bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
     if (CurrOffset != 0) {
       auto Offset =
           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
-      StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+      StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
     }
     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
     CurrOffset += CopyTy.getSizeInBytes();
@@ -1295,6 +1339,52 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
   return false;
 }
 
+bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
+                                           PtrAddChain &MatchInfo) {
+  // We're trying to match the following pattern:
+  //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
+  //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
+  // -->
+  //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
+
+  if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
+    return false;
+
+  Register Add2 = MI.getOperand(1).getReg();
+  Register Imm1 = MI.getOperand(2).getReg();
+  auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+  if (!MaybeImmVal)
+    return false;
+
+  MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
+  if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
+    return false;
+
+  Register Base = Add2Def->getOperand(1).getReg();
+  Register Imm2 = Add2Def->getOperand(2).getReg();
+  auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+  if (!MaybeImm2Val)
+    return false;
+
+  // Pass the combined immediate to the apply function.
+  MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
+  MatchInfo.Base = Base;
+  return true;
+}
+
+bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
+                                           PtrAddChain &MatchInfo) {
+  assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
+  MachineIRBuilder MIB(MI);
+  LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
+  auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+  Observer.changingInstr(MI);
+  MI.getOperand(1).setReg(MatchInfo.Base);
+  MI.getOperand(2).setReg(NewOffset.getReg(0));
+  Observer.changedInstr(MI);
+  return true;
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index be8efa8795f3..64023ecfad82 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -179,8 +179,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
     Known.Zero = KnownZeroOut;
     break;
   }
-  case TargetOpcode::G_GEP: {
-    // G_GEP is like G_ADD. FIXME: Is this true for all targets?
+  case TargetOpcode::G_PTR_ADD: {
+    // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
     LLT Ty = MRI.getType(MI.getOperand(1).getReg());
     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
       break;
@@ -373,6 +373,76 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
                     << Known.One.toString(16, false) << "\n");
 }
 
+unsigned GISelKnownBits::computeNumSignBits(Register R,
+                                            const APInt &DemandedElts,
+                                            unsigned Depth) {
+  MachineInstr &MI = *MRI.getVRegDef(R);
+  unsigned Opcode = MI.getOpcode();
+
+  if (Opcode == TargetOpcode::G_CONSTANT)
+    return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
+
+  if (Depth == getMaxDepth())
+    return 1;
+
+  if (!DemandedElts)
+    return 1; // No demanded elts, better to assume we don't know anything.
+
+  LLT DstTy = MRI.getType(R);
+
+  // Handle the case where this is called on a register that does not have a
+  // type constraint. This is unlikely to occur except by looking through copies
+  // but it is possible for the initial register being queried to be in this
+  // state.
+  if (!DstTy.isValid())
+    return 1;
+
+  switch (Opcode) {
+  case TargetOpcode::COPY: {
+    MachineOperand &Src = MI.getOperand(1);
+    if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
+        MRI.getType(Src.getReg()).isValid()) {
+      // Don't increment Depth for this one since we didn't do any work.
+      return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
+    }
+
+    return 1;
+  }
+  case TargetOpcode::G_SEXT: {
+    Register Src = MI.getOperand(1).getReg();
+    LLT SrcTy = MRI.getType(Src);
+    unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
+    return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
+  }
+  case TargetOpcode::G_TRUNC: {
+    Register Src = MI.getOperand(1).getReg();
+    LLT SrcTy = MRI.getType(Src);
+
+    // Check if the sign bits of source go down as far as the truncated value.
+    unsigned DstTyBits = DstTy.getScalarSizeInBits();
+    unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
+    unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
+    if (NumSrcSignBits > (NumSrcBits - DstTyBits))
+      return NumSrcSignBits - (NumSrcBits - DstTyBits);
+    break;
+  }
+  default:
+    break;
+  }
+
+  // TODO: Handle target instructions
+  // TODO: Fall back to known bits
+  return 1;
+}
+
+unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
+  LLT Ty = MRI.getType(R);
+  APInt DemandedElts = Ty.isVector()
+                           ? APInt::getAllOnesValue(Ty.getNumElements())
+                           : APInt(1, 1);
+  return computeNumSignBits(R, DemandedElts, Depth);
+}
+
 void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 45cef4aca888..17eca2b0301c 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -56,6 +56,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
@@ -224,12 +225,12 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
   if (FrameIndices.find(&AI) != FrameIndices.end())
     return FrameIndices[&AI];
 
-  unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
-  unsigned Size =
+  uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
+  uint64_t Size =
       ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
 
   // Always allocate at least one byte.
-  Size = std::max(Size, 1u);
+  Size = std::max<uint64_t>(Size, 1u);
 
   unsigned Alignment = AI.getAlignment();
   if (!Alignment)
@@ -466,7 +467,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
     return true;
   }
 
-  SL->findJumpTables(Clusters, &SI, DefaultMBB);
+  SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
 
   LLVM_DEBUG({
     dbgs() << "Case clusters: ";
@@ -885,13 +886,15 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
       Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
   for (unsigned i = 0; i < Regs.size(); ++i) {
     Register Addr;
-    MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
+    MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
 
     MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
     unsigned BaseAlign = getMemOpAlignment(LI);
+    AAMDNodes AAMetadata;
+    LI.getAAMetadata(AAMetadata);
     auto MMO = MF->getMachineMemOperand(
         Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
-        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges,
+        MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
         LI.getSyncScopeID(), LI.getOrdering());
     MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
   }
@@ -926,13 +929,15 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
 
   for (unsigned i = 0; i < Vals.size(); ++i) {
     Register Addr;
-    MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
+    MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
 
     MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
     unsigned BaseAlign = getMemOpAlignment(SI);
+    AAMDNodes AAMetadata;
+    SI.getAAMetadata(AAMetadata);
     auto MMO = MF->getMachineMemOperand(
         Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8,
-        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+        MinAlign(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
         SI.getSyncScopeID(), SI.getOrdering());
     MIRBuilder.buildStore(Vals[i], Addr, *MMO);
   }
@@ -1080,8 +1085,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
       if (Offset != 0) {
         LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
         auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
-        BaseReg =
-            MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0);
+        BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
+                      .getReg(0);
         Offset = 0;
       }
 
@@ -1100,14 +1105,14 @@ bool IRTranslator::translateGetElementPtr(const User &U,
       } else
         GepOffsetReg = IdxReg;
 
-      BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0);
+      BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
     }
   }
 
   if (Offset != 0) {
     auto OffsetMIB =
         MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset);
-    MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
+    MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
     return true;
   }
 
@@ -1251,6 +1256,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
       return TargetOpcode::G_FSQRT;
     case Intrinsic::trunc:
       return TargetOpcode::G_INTRINSIC_TRUNC;
+    case Intrinsic::readcyclecounter:
+      return TargetOpcode::G_READCYCLECOUNTER;
   }
   return Intrinsic::not_intrinsic;
 }
@@ -1412,7 +1419,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
     Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
     Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
+        TLI.isFMAFasterThanFMulAndFAdd(*MF,
+                                       TLI.getValueType(*DL, CI.getType()))) {
       // TODO: Revisit this to see if we should move this part of the
       // lowering to the combiner.
       MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
@@ -1518,6 +1526,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
   case Intrinsic::sideeffect:
     // Discard annotate attributes, assumptions, and artificial side-effects.
     return true;
+  case Intrinsic::read_register: {
+    Value *Arg = CI.getArgOperand(0);
+    MIRBuilder.buildInstr(TargetOpcode::G_READ_REGISTER)
+      .addDef(getOrCreateVReg(CI))
+      .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
+    return true;
+  }
   }
   return false;
 }
@@ -1587,7 +1602,13 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
   const Function *F = CI.getCalledFunction();
 
   // FIXME: support Windows dllimport function calls.
-  if (F && F->hasDLLImportStorageClass())
+  if (F && (F->hasDLLImportStorageClass() ||
+            (MF->getTarget().getTargetTriple().isOSWindows() &&
+             F->hasExternalWeakLinkage())))
+    return false;
+
+  // FIXME: support control flow guard targets.
+  if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
     return false;
 
   if (CI.isInlineAsm())
@@ -1683,6 +1704,10 @@ bool IRTranslator::translateInvoke(const User &U,
   if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
     return false;
 
+  // FIXME: support control flow guard targets.
+  if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
+    return false;
+
   // FIXME: support Windows exception handling.
   if (!isa<LandingPadInst>(EHPadBB->front()))
     return false;
@@ -1908,11 +1933,14 @@ bool IRTranslator::translateExtractElement(const User &U,
 
 bool IRTranslator::translateShuffleVector(const User &U,
                                           MachineIRBuilder &MIRBuilder) {
+  SmallVector<int, 8> Mask;
+  ShuffleVectorInst::getShuffleMask(cast<Constant>(U.getOperand(2)), Mask);
+  ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
   MIRBuilder.buildInstr(TargetOpcode::G_SHUFFLE_VECTOR)
       .addDef(getOrCreateVReg(U))
       .addUse(getOrCreateVReg(*U.getOperand(0)))
       .addUse(getOrCreateVReg(*U.getOperand(1)))
-      .addShuffleMask(cast<Constant>(U.getOperand(2)));
+      .addShuffleMask(MaskAlloc);
   return true;
 }
 
@@ -1950,11 +1978,14 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
   Register Cmp = getOrCreateVReg(*I.getCompareOperand());
   Register NewVal = getOrCreateVReg(*I.getNewValOperand());
 
+  AAMDNodes AAMetadata;
+  I.getAAMetadata(AAMetadata);
+
   MIRBuilder.buildAtomicCmpXchgWithSuccess(
       OldValRes, SuccessRes, Addr, Cmp, NewVal,
       *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
                                 Flags, DL->getTypeStoreSize(ValType),
-                                getMemOpAlignment(I), AAMDNodes(), nullptr,
+                                getMemOpAlignment(I), AAMetadata, nullptr,
                                 I.getSyncScopeID(), I.getSuccessOrdering(),
                                 I.getFailureOrdering()));
   return true;
@@ -2019,12 +2050,15 @@ bool IRTranslator::translateAtomicRMW(const User &U,
     break;
   }
 
+  AAMDNodes AAMetadata;
+  I.getAAMetadata(AAMetadata);
+
   MIRBuilder.buildAtomicRMW(
       Opcode, Res, Addr, Val,
       *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
                                 Flags, DL->getTypeStoreSize(ResType),
-                                getMemOpAlignment(I), AAMDNodes(), nullptr,
-                                I.getSyncScopeID(), I.getOrdering()));
+                                getMemOpAlignment(I), AAMetadata,
+                                nullptr, I.getSyncScopeID(), I.getOrdering()));
   return true;
 }
 
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 28143b30d4e8..b9c90e69ddb2 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -60,7 +60,7 @@ bool InstructionSelector::isBaseWithConstantOffset(
     return false;
 
   MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
-  if (RootI->getOpcode() != TargetOpcode::G_GEP)
+  if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
     return false;
 
   MachineOperand &RHS = RootI->getOperand(2);
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1593e21fe07e..e789e4a333dc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -139,22 +140,13 @@ public:
 };
 } // namespace
 
-bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
-  // If the ISel pipeline failed, do not bother running that pass.
-  if (MF.getProperties().hasProperty(
-          MachineFunctionProperties::Property::FailedISel))
-    return false;
-  LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
-  init(MF);
-  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
-  GISelCSEAnalysisWrapper &Wrapper =
-      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
-  MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
-
-  const size_t NumBlocks = MF.size();
+Legalizer::MFResult
+Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
+                                   ArrayRef<GISelChangeObserver *> AuxObservers,
+                                   MachineIRBuilder &MIRBuilder) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  // Populate Insts
+  // Populate worklists.
   InstListTy InstList;
   ArtifactListTy ArtifactList;
   ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
@@ -177,48 +169,33 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
   }
   ArtifactList.finalize();
   InstList.finalize();
-  std::unique_ptr<MachineIRBuilder> MIRBuilder;
-  GISelCSEInfo *CSEInfo = nullptr;
-  bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
-                       ? EnableCSEInLegalizer
-                       : TPC.isGISelCSEEnabled();
 
-  if (EnableCSE) {
-    MIRBuilder = std::make_unique<CSEMIRBuilder>();
-    CSEInfo = &Wrapper.get(TPC.getCSEConfig());
-    MIRBuilder->setCSEInfo(CSEInfo);
-  } else
-    MIRBuilder = std::make_unique<MachineIRBuilder>();
-  // This observer keeps the worklist updated.
+  // This observer keeps the worklists updated.
   LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
-  // We want both WorkListObserver as well as CSEInfo to observe all changes.
-  // Use the wrapper observer.
+  // We want both WorkListObserver as well as all the auxiliary observers (e.g.
+  // CSEInfo) to observe all changes. Use the wrapper observer.
   GISelObserverWrapper WrapperObserver(&WorkListObserver);
-  if (EnableCSE && CSEInfo)
-    WrapperObserver.addObserver(CSEInfo);
+  for (GISelChangeObserver *Observer : AuxObservers)
+    WrapperObserver.addObserver(Observer);
+
   // Now install the observer as the delegate to MF.
   // This will keep all the observers notified about new insertions/deletions.
   RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
-  LegalizerHelper Helper(MF, WrapperObserver, *MIRBuilder.get());
-  const LegalizerInfo &LInfo(Helper.getLegalizerInfo());
-  LegalizationArtifactCombiner ArtCombiner(*MIRBuilder.get(), MF.getRegInfo(),
-                                           LInfo);
+  LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
+  LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
   auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
     WrapperObserver.erasingInstr(*DeadMI);
   };
-  auto stopLegalizing = [&](MachineInstr &MI) {
-    Helper.MIRBuilder.stopObservingChanges();
-    reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
-                       "unable to legalize instruction", MI);
-  };
   bool Changed = false;
   SmallVector<MachineInstr *, 128> RetryList;
   do {
+    LLVM_DEBUG(dbgs() << "=== New Iteration ===\n");
     assert(RetryList.empty() && "Expected no instructions in RetryList");
     unsigned NumArtifacts = ArtifactList.size();
     while (!InstList.empty()) {
       MachineInstr &MI = *InstList.pop_back_val();
-      assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+      assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+             "Expecting generic opcode");
       if (isTriviallyDead(MI, MRI)) {
         LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
@@ -234,11 +211,17 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
         // legalizing InstList may generate artifacts that allow
         // ArtifactCombiner to combine away them.
         if (isArtifact(MI)) {
+          LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n");
+          assert(NumArtifacts == 0 &&
+                 "Artifacts are only expected in instruction list starting the "
+                 "second iteration, but each iteration starting second must "
+                 "start with an empty artifacts list");
+          (void)NumArtifacts;
           RetryList.push_back(&MI);
           continue;
         }
-        stopLegalizing(MI);
-        return false;
+        Helper.MIRBuilder.stopObservingChanges();
+        return {Changed, &MI};
       }
       WorkListObserver.printNewInstrs();
       Changed |= Res == LegalizerHelper::Legalized;
@@ -246,18 +229,19 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
     // Try to combine the instructions in RetryList again if there
     // are new artifacts. If not, stop legalizing.
     if (!RetryList.empty()) {
-      if (ArtifactList.size() > NumArtifacts) {
+      if (!ArtifactList.empty()) {
         while (!RetryList.empty())
           ArtifactList.insert(RetryList.pop_back_val());
       } else {
-        MachineInstr *MI = *RetryList.begin();
-        stopLegalizing(*MI);
-        return false;
+        LLVM_DEBUG(dbgs() << "No new artifacts created, not retrying!\n");
+        Helper.MIRBuilder.stopObservingChanges();
+        return {Changed, RetryList.front()};
       }
     }
     while (!ArtifactList.empty()) {
       MachineInstr &MI = *ArtifactList.pop_back_val();
-      assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
+      assert(isPreISelGenericOpcode(MI.getOpcode()) &&
+             "Expecting generic opcode");
       if (isTriviallyDead(MI, MRI)) {
         LLVM_DEBUG(dbgs() << MI << "Is dead\n");
         RemoveDeadInstFromLists(&MI);
@@ -265,6 +249,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
       SmallVector<MachineInstr *, 4> DeadInstructions;
+      LLVM_DEBUG(dbgs() << "Trying to combine: " << MI);
       if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
                                             WrapperObserver)) {
         WorkListObserver.printNewInstrs();
@@ -279,13 +264,58 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
       // If this was not an artifact (that could be combined away), this might
       // need special handling. Add it to InstList, so when it's processed
       // there, it has to be legal or specially handled.
-      else
+      else {
+        LLVM_DEBUG(dbgs() << ".. Not combined, moving to instructions list\n");
         InstList.insert(&MI);
+      }
     }
   } while (!InstList.empty());
 
+  return {Changed, /*FailedOn*/ nullptr};
+}
+
+bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
+  // If the ISel pipeline failed, do not bother running that pass.
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+  LLVM_DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+  init(MF);
+  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  GISelCSEAnalysisWrapper &Wrapper =
+      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
+  MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
+
+  const size_t NumBlocks = MF.size();
+
+  std::unique_ptr<MachineIRBuilder> MIRBuilder;
+  GISelCSEInfo *CSEInfo = nullptr;
+  bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
+                       ? EnableCSEInLegalizer
+                       : TPC.isGISelCSEEnabled();
+  if (EnableCSE) {
+    MIRBuilder = std::make_unique<CSEMIRBuilder>();
+    CSEInfo = &Wrapper.get(TPC.getCSEConfig());
+    MIRBuilder->setCSEInfo(CSEInfo);
+  } else
+    MIRBuilder = std::make_unique<MachineIRBuilder>();
+
+  SmallVector<GISelChangeObserver *, 1> AuxObservers;
+  if (EnableCSE && CSEInfo) {
+    // We want CSEInfo in addition to WorkListObserver to observe all changes.
+    AuxObservers.push_back(CSEInfo);
+  }
+
+  const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
+  MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder);
+
+  if (Result.FailedOn) {
+    reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+                       "unable to legalize instruction", *Result.FailedOn);
+    return false;
+  }
   // For now don't support if new blocks are inserted - we would need to fix the
-  // outerloop for that.
+  // outer loop for that.
   if (MF.size() != NumBlocks) {
     MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
                                       MF.getFunction().getSubprogram(),
@@ -294,6 +324,5 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
     reportGISelFailure(MF, TPC, MORE, R);
     return false;
   }
-
-  return Changed;
+  return Result.Changed;
 }
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 21512e543878..667e1a04dc34 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1075,6 +1075,28 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_BSWAP:
+  case TargetOpcode::G_BITREVERSE: {
+    if (SizeOp0 % NarrowSize != 0)
+      return UnableToLegalize;
+
+    Observer.changingInstr(MI);
+    SmallVector<Register, 2> SrcRegs, DstRegs;
+    unsigned NumParts = SizeOp0 / NarrowSize;
+    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+    for (unsigned i = 0; i < NumParts; ++i) {
+      auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+                                           {SrcRegs[NumParts - 1 - i]});
+      DstRegs.push_back(DstPart.getReg(0));
+    }
+
+    MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+
+    Observer.changedInstr(MI);
+    MI.eraseFromParent();
+    return Legalized;
+  }
   }
 }
 
@@ -1675,7 +1697,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   case TargetOpcode::G_CONSTANT: {
     MachineOperand &SrcMO = MI.getOperand(1);
     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
-    const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
+    unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
+        MRI.getType(MI.getOperand(0).getReg()));
+    assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
+            ExtOpc == TargetOpcode::G_ANYEXT) &&
+           "Illegal Extend");
+    const APInt &SrcVal = SrcMO.getCImm()->getValue();
+    const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
+                           ? SrcVal.sext(WideTy.getSizeInBits())
+                           : SrcVal.zext(WideTy.getSizeInBits());
     Observer.changingInstr(MI);
     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
 
@@ -1748,8 +1778,8 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
 
-  case TargetOpcode::G_GEP:
-    assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+  case TargetOpcode::G_PTR_ADD:
+    assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
     Observer.changingInstr(MI);
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
     Observer.changedInstr(MI);
@@ -1789,10 +1819,35 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     if (TypeIdx != 2)
       return UnableToLegalize;
     Observer.changingInstr(MI);
+    // TODO: Probably should be zext
     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
     Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_INSERT_VECTOR_ELT: {
+    if (TypeIdx == 1) {
+      Observer.changingInstr(MI);
+
+      Register VecReg = MI.getOperand(1).getReg();
+      LLT VecTy = MRI.getType(VecReg);
+      LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
+
+      widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
+      widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
+      widenScalarDst(MI, WideVecTy, 0);
+      Observer.changedInstr(MI);
+      return Legalized;
+    }
+
+    if (TypeIdx == 2) {
+      Observer.changingInstr(MI);
+      // TODO: Probably should be zext
+      widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
+      Observer.changedInstr(MI);
+    }
+
+    return Legalized;
+  }
   case TargetOpcode::G_FADD:
   case TargetOpcode::G_FMUL:
   case TargetOpcode::G_FSUB:
@@ -1998,6 +2053,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   }
   case TargetOpcode::G_FMAD:
     return lowerFMad(MI);
+  case TargetOpcode::G_INTRINSIC_ROUND:
+    return lowerIntrinsicRound(MI);
   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
     Register OldValRes = MI.getOperand(0).getReg();
     Register SuccessRes = MI.getOperand(1).getReg();
@@ -2058,8 +2115,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
 
         auto OffsetCst =
             MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
-        Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
-        auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+        Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+        auto SmallPtr =
+            MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
         auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
                                               *SmallMMO);
 
@@ -2083,7 +2141,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
       default:
         llvm_unreachable("Unexpected opcode");
       case TargetOpcode::G_LOAD:
-        MIRBuilder.buildAnyExt(DstReg, TmpReg);
+        MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
         break;
       case TargetOpcode::G_SEXTLOAD:
         MIRBuilder.buildSExt(DstReg, TmpReg);
@@ -2126,12 +2184,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
     auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
 
-    // Generate the GEP and truncating stores.
+    // Generate the PtrAdd and truncating stores.
     LLT PtrTy = MRI.getType(PtrReg);
     auto OffsetCst =
         MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
-    Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
-    auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+    Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
+    auto SmallPtr =
+        MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
 
     MachineFunction &MF = MIRBuilder.getMF();
     MachineMemOperand *LargeMMO =
@@ -2254,6 +2313,12 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
     return lowerExtract(MI);
   case G_INSERT:
     return lowerInsert(MI);
+  case G_BSWAP:
+    return lowerBswap(MI);
+  case G_BITREVERSE:
+    return lowerBitreverse(MI);
+  case G_READ_REGISTER:
+    return lowerReadRegister(MI);
   }
 }
 
@@ -2883,7 +2948,7 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
       unsigned ByteOffset = Offset / 8;
       Register NewAddrReg;
 
-      MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+      MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
 
       MachineMemOperand *NewMMO =
         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
@@ -2960,6 +3025,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_BSWAP:
   case G_BITREVERSE:
   case G_SDIV:
+  case G_UDIV:
+  case G_SREM:
+  case G_UREM:
   case G_SMIN:
   case G_SMAX:
   case G_UMIN:
@@ -3259,7 +3327,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_SMIN:
   case TargetOpcode::G_SMAX:
   case TargetOpcode::G_UMIN:
-  case TargetOpcode::G_UMAX: {
+  case TargetOpcode::G_UMAX:
+  case TargetOpcode::G_FMINNUM:
+  case TargetOpcode::G_FMAXNUM:
+  case TargetOpcode::G_FMINNUM_IEEE:
+  case TargetOpcode::G_FMAXNUM_IEEE:
+  case TargetOpcode::G_FMINIMUM:
+  case TargetOpcode::G_FMAXIMUM: {
     Observer.changingInstr(MI);
     moreElementsVectorSrc(MI, MoreTy, 1);
     moreElementsVectorSrc(MI, MoreTy, 2);
@@ -3352,7 +3426,7 @@ void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
       Factors.push_back(Umulh.getReg(0));
     }
-    // Add CarrySum from additons calculated for previous DstIdx.
+    // Add CarrySum from additions calculated for previous DstIdx.
     if (DstIdx != 1) {
       Factors.push_back(CarrySumPrevDstIdx);
     }
@@ -3824,6 +3898,14 @@ LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   LLT DstTy = MRI.getType(Dst);
   LLT SrcTy = MRI.getType(Src);
 
+  if (SrcTy == LLT::scalar(1)) {
+    auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
+    auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+    MIRBuilder.buildSelect(Dst, Src, True, False);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
   if (SrcTy != LLT::scalar(64))
     return UnableToLegalize;
 
@@ -3849,6 +3931,14 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   const LLT S32 = LLT::scalar(32);
   const LLT S1 = LLT::scalar(1);
 
+  if (SrcTy == S1) {
+    auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
+    auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
+    MIRBuilder.buildSelect(Dst, Src, True, False);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+
   if (SrcTy != S64)
     return UnableToLegalize;
 
@@ -3910,8 +4000,10 @@ LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
 
+  const LLT S1 = LLT::scalar(1);
+
   MachineInstrBuilder FCMP =
-      MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold);
+      MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
 
   MI.eraseFromParent();
@@ -4042,6 +4134,33 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
 }
 
 LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  unsigned Flags = MI.getFlags();
+  LLT Ty = MRI.getType(DstReg);
+  const LLT CondTy = Ty.changeElementSize(1);
+
+  // result = trunc(src);
+  // if (src < 0.0 && src != result)
+  //   result += -1.0.
+
+  auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
+  auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
+
+  auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
+                                  SrcReg, Zero, Flags);
+  auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
+                                      SrcReg, Trunc, Flags);
+  auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
+  auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
+
+  MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
   const unsigned NumDst = MI.getNumOperands() - 1;
   const Register SrcReg = MI.getOperand(NumDst).getReg();
@@ -4083,10 +4202,7 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
   LLT DstTy = MRI.getType(DstReg);
   LLT IdxTy = LLT::scalar(32);
 
-  const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
-
-  SmallVector<int, 32> Mask;
-  ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
+  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
 
   if (DstTy.isScalar()) {
     if (Src0Ty.isVector())
@@ -4151,7 +4267,7 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
 
   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
   // have to generate an extra instruction to negate the alloc and then use
-  // G_GEP to add the negative offset.
+  // G_PTR_ADD to add the negative offset.
   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
   if (Align) {
     APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
@@ -4275,3 +4391,99 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
   MI.eraseFromParent();
   return Legalized;
 }
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBswap(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  const LLT Ty = MRI.getType(Src);
+  unsigned SizeInBytes = Ty.getSizeInBytes();
+  unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
+
+  // Swap most and least significant byte, set remaining bytes in Res to zero.
+  auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
+  auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
+  auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+  auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
+
+  // Set i-th high/low byte in Res to i-th low/high byte from Src.
+  for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
+    // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
+    APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
+    auto Mask = MIRBuilder.buildConstant(Ty, APMask);
+    auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
+    // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
+    auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
+    auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
+    Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
+    // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
+    auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
+    auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
+    Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
+  }
+  Res.getInstr()->getOperand(0).setReg(Dst);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+//{ (Src & Mask) >> N } | { (Src << N) & Mask }
+static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
+                                 MachineInstrBuilder Src, APInt Mask) {
+  const LLT Ty = Dst.getLLTTy(*B.getMRI());
+  MachineInstrBuilder C_N = B.buildConstant(Ty, N);
+  MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
+  auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
+  auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
+  return B.buildOr(Dst, LHS, RHS);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  const LLT Ty = MRI.getType(Src);
+  unsigned Size = Ty.getSizeInBits();
+
+  MachineInstrBuilder BSWAP =
+      MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
+
+  // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
+  //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
+  // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
+  MachineInstrBuilder Swap4 =
+      SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
+
+  // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
+  //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
+  // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
+  MachineInstrBuilder Swap2 =
+      SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
+
+  // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
+  //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
+  // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
+  SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
+  Register Dst = MI.getOperand(0).getReg();
+  const LLT Ty = MRI.getType(Dst);
+  const MDString *RegStr = cast<MDString>(
+    cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
+
+  MachineFunction &MF = MIRBuilder.getMF();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetLowering *TLI = STI.getTargetLowering();
+  Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
+  if (!Reg.isValid())
+    return UnableToLegalize;
+
+  MIRBuilder.buildCopy(Dst, Reg);
+  MI.eraseFromParent();
+  return Legalized;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 70045512fae5..02f6b39e0905 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -128,24 +128,26 @@ static bool mutationIsSane(const LegalizeRule &Rule,
 
   switch (Rule.getAction()) {
   case FewerElements:
-  case MoreElements: {
     if (!OldTy.isVector())
       return false;
-
+    LLVM_FALLTHROUGH;
+  case MoreElements: {
+    // MoreElements can go from scalar to vector.
+    const unsigned OldElts = OldTy.isVector() ? OldTy.getNumElements() : 1;
     if (NewTy.isVector()) {
       if (Rule.getAction() == FewerElements) {
         // Make sure the element count really decreased.
-        if (NewTy.getNumElements() >= OldTy.getNumElements())
+        if (NewTy.getNumElements() >= OldElts)
           return false;
       } else {
         // Make sure the element count really increased.
-        if (NewTy.getNumElements() <= OldTy.getNumElements())
+        if (NewTy.getNumElements() <= OldElts)
           return false;
       }
     }
 
     // Make sure the element type didn't change.
-    return NewTy.getScalarType() == OldTy.getElementType();
+    return NewTy.getScalarType() == OldTy.getScalarType();
   }
   case NarrowScalar:
   case WidenScalar: {
@@ -685,6 +687,10 @@ bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
   return true;
 }
 
+unsigned LegalizerInfo::getExtOpcodeForWideningConstant(LLT SmallTy) const {
+  return SmallTy.isByteSized() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+}
+
 /// \pre Type indices of every opcode form a dense set starting from 0.
 void LegalizerInfo::verify(const MCInstrInfo &MII) const {
 #ifndef NDEBUG
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index f882ecbf5db3..1c4a668e5f31 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -10,9 +10,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GlobalISel/Localizer.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "localizer"
@@ -28,7 +29,11 @@ INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
                     "Move/duplicate certain instructions close to their use",
                     false, false)
 
-Localizer::Localizer() : MachineFunctionPass(ID) { }
+Localizer::Localizer(std::function<bool(const MachineFunction &)> F)
+    : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+Localizer::Localizer()
+    : Localizer([](const MachineFunction &) { return false; }) {}
 
 void Localizer::init(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
@@ -211,6 +216,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
           MachineFunctionProperties::Property::FailedISel))
     return false;
 
+  // Don't run the pass if the target asked so.
+  if (DoNotRunPass(MF))
+    return false;
+
   LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
 
   init(MF);
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index df770f6664ca..67d9dacda61b 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -219,19 +219,19 @@ void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
   assert((Res == Op0) && "type mismatch");
 }
 
-MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
-                                               const SrcOp &Op0,
-                                               const SrcOp &Op1) {
+MachineInstrBuilder MachineIRBuilder::buildPtrAdd(const DstOp &Res,
+                                                  const SrcOp &Op0,
+                                                  const SrcOp &Op1) {
   assert(Res.getLLTTy(*getMRI()).isPointer() &&
          Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
   assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
 
-  return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1});
+  return buildInstr(TargetOpcode::G_PTR_ADD, {Res}, {Op0, Op1});
 }
 
 Optional<MachineInstrBuilder>
-MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
-                                 const LLT &ValueTy, uint64_t Value) {
+MachineIRBuilder::materializePtrAdd(Register &Res, Register Op0,
+                                    const LLT &ValueTy, uint64_t Value) {
   assert(Res == 0 && "Res is a result argument");
   assert(ValueTy.isScalar()  && "invalid offset type");
 
@@ -242,7 +242,7 @@ MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
 
   Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
   auto Cst = buildConstant(ValueTy, Value);
-  return buildGEP(Res, Op0, Cst.getReg(0));
+  return buildPtrAdd(Res, Op0, Cst.getReg(0));
 }
 
 MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res,
@@ -698,8 +698,9 @@ MachineInstrBuilder MachineIRBuilder::buildTrunc(const DstOp &Res,
 }
 
 MachineInstrBuilder MachineIRBuilder::buildFPTrunc(const DstOp &Res,
-                                                   const SrcOp &Op) {
-  return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op);
+                                                   const SrcOp &Op,
+                                                   Optional<unsigned> Flags) {
+  return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
 }
 
 MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index f0e35c65c53b..98e48f5fc1d5 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/CommandLine.h"
@@ -118,16 +119,16 @@ bool RegBankSelect::assignmentMatch(
     return false;
 
   const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
-  const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+  const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
   // Reg is free of assignment, a simple assignment will make the
   // register bank to match.
   OnlyAssign = CurRegBank == nullptr;
   LLVM_DEBUG(dbgs() << "Does assignment already match: ";
              if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
              dbgs() << " against ";
-             assert(DesiredRegBrank && "The mapping must be valid");
-             dbgs() << *DesiredRegBrank << '\n';);
-  return CurRegBank == DesiredRegBrank;
+             assert(DesiredRegBank && "The mapping must be valid");
+             dbgs() << *DesiredRegBank << '\n';);
+  return CurRegBank == DesiredRegBank;
 }
 
 bool RegBankSelect::repairReg(
@@ -259,11 +260,11 @@ uint64_t RegBankSelect::getRepairCost(
     return RBI->getBreakDownCost(ValMapping, CurRegBank);
 
   if (IsSameNumOfValues) {
-    const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
+    const RegisterBank *DesiredRegBank = ValMapping.BreakDown[0].RegBank;
     // If we repair a definition, swap the source and destination for
     // the repairing.
     if (MO.isDef())
-      std::swap(CurRegBank, DesiredRegBrank);
+      std::swap(CurRegBank, DesiredRegBank);
     // TODO: It may be possible to actually avoid the copy.
     // If we repair something where the source is defined by a copy
     // and the source of that copy is on the right bank, we can reuse
@@ -275,7 +276,7 @@ uint64_t RegBankSelect::getRepairCost(
     // into a new virtual register.
     // We would also need to propagate this information in the
     // repairing placement.
-    unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank,
+    unsigned Cost = RBI->copyCost(*DesiredRegBank, *CurRegBank,
                                   RBI->getSizeInBits(MO.getReg(), *MRI, *TRI));
     // TODO: use a dedicated constant for ImpossibleCost.
     if (Cost != std::numeric_limits<unsigned>::max())
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 3fcc55286beb..255ea693b5c4 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -82,15 +82,18 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
 const RegisterBank *
 RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
                              const TargetRegisterInfo &TRI) const {
-  if (Register::isPhysicalRegister(Reg))
-    return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
+  if (Register::isPhysicalRegister(Reg)) {
+    // FIXME: This was probably a copy to a virtual register that does have a
+    // type we could use.
+    return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT());
+  }
 
   assert(Reg && "NoRegister does not have a register bank");
   const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
   if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
     return RB;
   if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
-    return &getRegBankFromRegClass(*RC);
+    return &getRegBankFromRegClass(*RC, MRI.getType(Reg));
   return nullptr;
 }
 
@@ -108,15 +111,18 @@ RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
 
 const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
     const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII,
-    const TargetRegisterInfo &TRI) const {
+    const MachineRegisterInfo &MRI) const {
+  const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+
   // The mapping of the registers may be available via the
   // register class constraints.
-  const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, &TRI);
+  const TargetRegisterClass *RC = MI.getRegClassConstraint(OpIdx, &TII, TRI);
 
   if (!RC)
     return nullptr;
 
-  const RegisterBank &RegBank = getRegBankFromRegClass(*RC);
+  Register Reg = MI.getOperand(OpIdx).getReg();
+  const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg));
   // Sanity check that the target properly implemented getRegBankFromRegClass.
   assert(RegBank.covers(*RC) &&
          "The mapping of the register bank does not make sense");
@@ -195,7 +201,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
     if (!CurRegBank) {
       // If this is a target specific instruction, we can deduce
       // the register bank from the encoding constraints.
-      CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
+      CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, MRI);
       if (!CurRegBank) {
         // All our attempts failed, give up.
         CompleteMapping = false;
@@ -444,7 +450,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
       continue;
     }
     if (!MO.getReg()) {
-      LLVM_DEBUG(dbgs() << " is %%noreg, nothing to be done\n");
+      LLVM_DEBUG(dbgs() << " is $noreg, nothing to be done\n");
       continue;
     }
     assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns !=
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 45618d7992ad..eeec2a5d536a 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -431,20 +431,3 @@ Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
 void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
   AU.addPreserved<StackProtector>();
 }
-
-MVT llvm::getMVTForLLT(LLT Ty) {
-  if (!Ty.isVector())
-    return MVT::getIntegerVT(Ty.getSizeInBits());
-
-  return MVT::getVectorVT(
-      MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
-      Ty.getNumElements());
-}
-
-LLT llvm::getLLTForMVT(MVT Ty) {
-  if (!Ty.isVector())
-    return LLT::scalar(Ty.getSizeInBits());
-
-  return LLT::vector(Ty.getVectorNumElements(),
-                     Ty.getVectorElementType().getSizeInBits());
-}
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index d4fa45fcb405..5870e20d4227 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -82,6 +82,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 6a0f98d2e2b4..65c2a37e5d43 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -15,25 +15,28 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
@@ -75,8 +78,44 @@ ForceGuardLoopEntry(
 
 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
 
+#ifndef NDEBUG
+static void debugHWLoopFailure(const StringRef DebugMsg,
+    Instruction *I) {
+  dbgs() << "HWLoops: " << DebugMsg;
+  if (I)
+    dbgs() << ' ' << *I;
+  else
+    dbgs() << '.';
+  dbgs() << '\n';
+}
+#endif
+
+static OptimizationRemarkAnalysis
+createHWLoopAnalysis(StringRef RemarkName, Loop *L, Instruction *I) {
+  Value *CodeRegion = L->getHeader();
+  DebugLoc DL = L->getStartLoc();
+
+  if (I) {
+    CodeRegion = I->getParent();
+    // If there is no debug location attached to the instruction, revert back to
+    // using the loop's.
+    if (I->getDebugLoc())
+      DL = I->getDebugLoc();
+  }
+
+  OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion);
+  R << "hardware-loop not created: ";
+  return R;
+}
+
 namespace {
 
+  void reportHWLoopFailure(const StringRef Msg, const StringRef ORETag,
+      OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I = nullptr) {
+    LLVM_DEBUG(debugHWLoopFailure(Msg, I));
+    ORE->emit(createHWLoopAnalysis(ORETag, TheLoop, I) << Msg);
+  }
+
   using TTI = TargetTransformInfo;
 
   class HardwareLoops : public FunctionPass {
@@ -97,6 +136,7 @@ namespace {
       AU.addRequired<ScalarEvolutionWrapperPass>();
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
+      AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     }
 
     // Try to convert the given Loop into a hardware loop.
@@ -110,6 +150,7 @@ namespace {
     ScalarEvolution *SE = nullptr;
     LoopInfo *LI = nullptr;
     const DataLayout *DL = nullptr;
+    OptimizationRemarkEmitter *ORE = nullptr;
     const TargetTransformInfo *TTI = nullptr;
     DominatorTree *DT = nullptr;
     bool PreserveLCSSA = false;
@@ -143,8 +184,9 @@ namespace {
 
   public:
     HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
-                 const DataLayout &DL) :
-      SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
+                 const DataLayout &DL,
+                 OptimizationRemarkEmitter *ORE) :
+      SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
       ExitCount(Info.ExitCount),
       CountType(Info.CountType),
       ExitBranch(Info.ExitBranch),
@@ -157,6 +199,7 @@ namespace {
   private:
     ScalarEvolution &SE;
     const DataLayout &DL;
+    OptimizationRemarkEmitter *ORE = nullptr;
     Loop *L                 = nullptr;
     Module *M               = nullptr;
     const SCEV *ExitCount   = nullptr;
@@ -182,6 +225,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   DL = &F.getParent()->getDataLayout();
+  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
   PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
@@ -201,31 +245,39 @@ bool HardwareLoops::runOnFunction(Function &F) {
 // converted and the parent loop doesn't support containing a hardware loop.
 bool HardwareLoops::TryConvertLoop(Loop *L) {
   // Process nested loops first.
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    if (TryConvertLoop(*I))
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+    if (TryConvertLoop(*I)) {
+      reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
+                          ORE, L);
       return true; // Stop search.
+    }
+  }
 
   HardwareLoopInfo HWLoopInfo(L);
-  if (!HWLoopInfo.canAnalyze(*LI))
+  if (!HWLoopInfo.canAnalyze(*LI)) {
+    reportHWLoopFailure("cannot analyze loop, irreducible control flow",
+                        "HWLoopCannotAnalyze", ORE, L);
     return false;
+  }
 
-  if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
-      ForceHardwareLoops) {
-
-    // Allow overriding of the counter width and loop decrement value.
-    if (CounterBitWidth.getNumOccurrences())
-      HWLoopInfo.CountType =
-        IntegerType::get(M->getContext(), CounterBitWidth);
+  if (!ForceHardwareLoops &&
+      !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
+    reportHWLoopFailure("it's not profitable to create a hardware-loop",
+                        "HWLoopNotProfitable", ORE, L);
+    return false;
+  }
 
-    if (LoopDecrement.getNumOccurrences())
-      HWLoopInfo.LoopDecrement =
-        ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+  // Allow overriding of the counter width and loop decrement value.
+  if (CounterBitWidth.getNumOccurrences())
+    HWLoopInfo.CountType =
+      IntegerType::get(M->getContext(), CounterBitWidth);
 
-    MadeChange |= TryConvertLoop(HWLoopInfo);
-    return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
-  }
+  if (LoopDecrement.getNumOccurrences())
+    HWLoopInfo.LoopDecrement =
+      ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
 
-  return false;
+  MadeChange |= TryConvertLoop(HWLoopInfo);
+  return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
 }
 
 bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
@@ -234,8 +286,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
   LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
 
   if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
-                                          ForceHardwareLoopPHI))
+                                          ForceHardwareLoopPHI)) {
+    // TODO: there can be many reasons a loop is not considered a
+    // candidate, so we should let isHardwareLoopCandidate fill in the
+    // reason and then report a better message here.
+    reportHWLoopFailure("loop is not a candidate", "HWLoopNoCandidate", ORE, L);
     return false;
+  }
 
   assert(
       (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
@@ -249,7 +306,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
   if (!Preheader)
     return false;
 
-  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
+  HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
   HWLoop.Create();
   ++NumHWLoops;
   return true;
@@ -257,10 +314,13 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
 
 void HardwareLoop::Create() {
   LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
- 
+
   Value *LoopCountInit = InitLoopCount();
-  if (!LoopCountInit)
+  if (!LoopCountInit) {
+    reportHWLoopFailure("could not safely create a loop count expression",
+                        "HWLoopNotSafe", ORE, L);
     return;
+  }
 
   InsertIterationSetup(LoopCountInit);
 
@@ -458,6 +518,7 @@ INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
 
 FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index d9caa5660695..7d64828aa482 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -35,7 +36,9 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
@@ -211,6 +214,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineBlockFrequencyInfo>();
       AU.addRequired<MachineBranchProbabilityInfo>();
+      AU.addRequired<ProfileSummaryInfoWrapperPass>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -432,6 +436,7 @@ char &llvm::IfConverterID = IfConverter::ID;
 
 INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
 
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
@@ -444,6 +449,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TRI = ST.getRegisterInfo();
   BranchFolder::MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+  ProfileSummaryInfo *PSI =
+      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   MRI = &MF.getRegInfo();
   SchedModel.init(&ST);
 
@@ -454,7 +461,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   bool BFChange = false;
   if (!PreRegAlloc) {
     // Tail merge tend to expose more if-conversion opportunities.
-    BranchFolder BF(true, false, MBFI, *MBPI);
+    BranchFolder BF(true, false, MBFI, *MBPI, PSI);
     auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
     BFChange = BF.OptimizeFunction(
         MF, TII, ST.getRegisterInfo(),
@@ -596,7 +603,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   BBAnalysis.clear();
 
   if (MadeChange && IfCvtBranchFold) {
-    BranchFolder BF(false, false, MBFI, *MBPI);
+    BranchFolder BF(false, false, MBFI, *MBPI, PSI);
     auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
     BF.OptimizeFunction(
         MF, TII, MF.getSubtarget().getRegisterInfo(),
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index b7dcaec90106..0bbedb0a5ea6 100644
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -50,6 +50,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
@@ -371,7 +372,7 @@ ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
 
   // We want the mem access to be issued at a sane offset from PointerReg,
   // so that if PointerReg is null then the access reliably page faults.
-  if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() &&
+  if (!(MI.mayLoadOrStore() && !MI.isPredicable() &&
         -PageSize < Offset && Offset < PageSize))
     return SR_Unsuitable;
 
@@ -697,7 +698,7 @@ void ImplicitNullChecks::rewriteNullChecks(
 
     if (auto *DepMI = NC.getOnlyDependency()) {
       for (auto &MO : DepMI->operands()) {
-        if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+        if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead())
           continue;
         if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
           NC.getNotNullSucc()->addLiveIn(MO.getReg());
diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index 7ac093ba4a71..4473a139d3ad 100644
--- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 2408f18678e4..ed3e159ac566 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -534,7 +534,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
   //   may have more remats than physregs, we're guaranteed to fail to assign
   //   one.
   // At the moment, we only handle this for STATEPOINTs since they're the only
-  // psuedo op where we've seen this.  If we start seeing other instructions
+  // pseudo op where we've seen this.  If we start seeing other instructions
   // with the same problem, we need to revisit this.
   return (MI.getOpcode() != TargetOpcode::STATEPOINT);
 }
@@ -543,8 +543,7 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
 bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
   // Analyze instruction
   SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops;
-  MIBundleOperands::VirtRegInfo RI =
-      MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+  VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, VirtReg.reg, &Ops);
 
   if (!RI.Reads)
     return false;
@@ -782,7 +781,7 @@ static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B,
 /// foldMemoryOperand - Try folding stack slot references in Ops into their
 /// instructions.
 ///
-/// @param Ops    Operand indices from analyzeVirtReg().
+/// @param Ops    Operand indices from AnalyzeVirtRegInBundle().
 /// @param LoadMI Load instruction to use instead of stack slot when non-null.
 /// @return       True on success.
 bool InlineSpiller::
@@ -851,8 +850,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
     // Skip non-Defs, including undef uses and internal reads.
     if (MO->isUse())
       continue;
-    MIBundleOperands::PhysRegInfo RI =
-        MIBundleOperands(*FoldMI).analyzePhysReg(Reg, &TRI);
+    PhysRegInfo RI = AnalyzePhysRegInBundle(*FoldMI, Reg, &TRI);
     if (RI.FullyDefined)
       continue;
     // FoldMI does not define this physreg. Remove the LI segment.
@@ -992,8 +990,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
 
     // Analyze instruction.
     SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
-    MIBundleOperands::VirtRegInfo RI =
-        MIBundleOperands(*MI).analyzeVirtReg(Reg, &Ops);
+    VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops);
 
     // Find the slot index where this instruction reads and writes OldLI.
     // This is usually the def slot, except for tied early clobbers.
@@ -1430,7 +1427,7 @@ void HoistSpillHelper::runHoistSpills(
   }
   // For spills in SpillsToKeep with LiveReg set (i.e., not original spill),
   // save them to SpillsToIns.
-  for (const auto Ent : SpillsToKeep) {
+  for (const auto &Ent : SpillsToKeep) {
     if (Ent.second)
       SpillsToIns[Ent.first->getBlock()] = Ent.second;
   }
@@ -1489,7 +1486,7 @@ void HoistSpillHelper::hoistAllSpills() {
 
     LLVM_DEBUG({
       dbgs() << "Finally inserted spills in BB: ";
-      for (const auto Ispill : SpillsToIns)
+      for (const auto &Ispill : SpillsToIns)
         dbgs() << Ispill.first->getNumber() << " ";
       dbgs() << "\nFinally removed spills in BB: ";
       for (const auto Rspill : SpillsToRm)
@@ -1504,7 +1501,7 @@ void HoistSpillHelper::hoistAllSpills() {
                                      StackIntvl.getValNumInfo(0));
 
     // Insert hoisted spills.
-    for (auto const Insert : SpillsToIns) {
+    for (auto const &Insert : SpillsToIns) {
       MachineBasicBlock *BB = Insert.first;
       unsigned LiveReg = Insert.second;
       MachineBasicBlock::iterator MI = IPA.getLastInsertPointIter(OrigLI, *BB);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 14bc560a561c..1f9b436378d2 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -58,6 +58,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 770c4952d169..42691b8a6154 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -34,6 +34,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1167,7 +1168,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
 
     // If there are users outside the set to be eliminated, we abort the
     // transformation. No gain can be expected.
-    for (const auto &U : I->users()) {
+    for (auto *U : I->users()) {
       if (Is.find(dyn_cast<Instruction>(U)) == Is.end())
         return false;
     }
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 8cbd8bcaeabb..4461a235d6c1 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -50,14 +50,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
   return NewCI;
 }
 
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
-                         !defined(setjmp_undefined_for_msvc)
-#  pragma push_macro("setjmp")
-#  undef setjmp
-#  define setjmp_undefined_for_msvc
-#endif
-
 /// Emit the code to lower bswap of V before the specified instruction IP.
 static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
@@ -254,34 +246,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
 
-    // The setjmp/longjmp intrinsics should only exist in the code if it was
-    // never optimized (ie, right out of the CFE), or if it has been hacked on
-    // by the lowerinvoke pass.  In both cases, the right thing to do is to
-    // convert the call to an explicit setjmp or longjmp call.
-  case Intrinsic::setjmp: {
-    Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
-                               Type::getInt32Ty(Context));
-    if (!CI->getType()->isVoidTy())
-      CI->replaceAllUsesWith(V);
-    break;
-  }
-  case Intrinsic::sigsetjmp:
-     if (!CI->getType()->isVoidTy())
-       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
-     break;
-
-  case Intrinsic::longjmp: {
-    ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
-                    Type::getVoidTy(Context));
-    break;
-  }
-
-  case Intrinsic::siglongjmp: {
-    // Insert the call to abort
-    ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
-                    Type::getVoidTy(Context));
-    break;
-  }
   case Intrinsic::ctpop:
     CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
     break;
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 1c362aec6e67..50c178ff7598 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -48,8 +48,8 @@ void LLVMTargetMachine::initAsmInfo() {
   STI.reset(TheTarget.createMCSubtargetInfo(
       getTargetTriple().str(), getTargetCPU(), getTargetFeatureString()));
 
-  MCAsmInfo *TmpAsmInfo =
-      TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str());
+  MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(
+      *MRI, getTargetTriple().str(), Options.MCOptions);
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
   // we'll crash later.
diff --git a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index cef5085ae079..63a0d0c1c43e 100644
--- a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -14,6 +14,7 @@
 ///===---------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp
index f1b237d83e8c..2226c10b49a4 100644
--- a/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -57,6 +57,7 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
@@ -89,8 +90,28 @@ static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
 }
 
+/// If \p Op is a stack or frame register return true, otherwise return false.
+/// This is used to avoid basing the debug entry values on the registers, since
+/// we do not support it at the moment.
+static bool isRegOtherThanSPAndFP(const MachineOperand &Op,
+                                  const MachineInstr &MI,
+                                  const TargetRegisterInfo *TRI) {
+  if (!Op.isReg())
+    return false;
+
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+  unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+  Register FP = TRI->getFrameRegister(*MF);
+  Register Reg = Op.getReg();
+
+  return Reg && Reg != SP && Reg != FP;
+}
+
 namespace {
 
+using DefinedRegsSet = SmallSet<Register, 32>;
+
 class LiveDebugValues : public MachineFunctionPass {
 private:
   const TargetRegisterInfo *TRI;
@@ -123,60 +144,6 @@ private:
   using FragmentInfo = DIExpression::FragmentInfo;
   using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
 
-  /// Storage for identifying a potentially inlined instance of a variable,
-  /// or a fragment thereof.
-  class DebugVariable {
-    const DILocalVariable *Variable;
-    OptFragmentInfo Fragment;
-    const DILocation *InlinedAt;
-
-    /// Fragment that will overlap all other fragments. Used as default when
-    /// caller demands a fragment.
-    static const FragmentInfo DefaultFragment;
-
-  public:
-    DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo,
-                  const DILocation *InlinedAt)
-        : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
-
-    DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo,
-                  const DILocation *InlinedAt)
-        : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
-
-    DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr,
-                  const DILocation *InlinedAt)
-        : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {}
-
-    DebugVariable(const MachineInstr &MI)
-        : DebugVariable(MI.getDebugVariable(),
-                        MI.getDebugExpression()->getFragmentInfo(),
-                        MI.getDebugLoc()->getInlinedAt()) {}
-
-    const DILocalVariable *getVar() const { return Variable; }
-    const OptFragmentInfo &getFragment() const { return Fragment; }
-    const DILocation *getInlinedAt() const { return InlinedAt; }
-
-    const FragmentInfo getFragmentDefault() const {
-      return Fragment.getValueOr(DefaultFragment);
-    }
-
-    static bool isFragmentDefault(FragmentInfo &F) {
-      return F == DefaultFragment;
-    }
-
-    bool operator==(const DebugVariable &Other) const {
-      return std::tie(Variable, Fragment, InlinedAt) ==
-             std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
-    }
-
-    bool operator<(const DebugVariable &Other) const {
-      return std::tie(Variable, Fragment, InlinedAt) <
-             std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
-    }
-  };
-
-  friend struct llvm::DenseMapInfo<DebugVariable>;
-
   /// A pair of debug variable and value location.
   struct VarLoc {
     // The location at which a spilled variable resides. It consists of a
@@ -205,7 +172,9 @@ private:
       RegisterKind,
       SpillLocKind,
       ImmediateKind,
-      EntryValueKind
+      EntryValueKind,
+      EntryValueBackupKind,
+      EntryValueCopyBackupKind
     } Kind = InvalidKind;
 
     /// The value location. Stored separately to avoid repeatedly
@@ -220,14 +189,15 @@ private:
     } Loc;
 
     VarLoc(const MachineInstr &MI, LexicalScopes &LS)
-        : Var(MI), Expr(MI.getDebugExpression()), MI(MI),
-          UVS(MI.getDebugLoc(), LS) {
+        : Var(MI.getDebugVariable(), MI.getDebugExpression(),
+              MI.getDebugLoc()->getInlinedAt()),
+          Expr(MI.getDebugExpression()), MI(MI), UVS(MI.getDebugLoc(), LS) {
       static_assert((sizeof(Loc) == sizeof(uint64_t)),
                     "hash does not cover all members of Loc");
       assert(MI.isDebugValue() && "not a DBG_VALUE");
       assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
       if (int RegNo = isDbgValueDescribedByReg(MI)) {
-        Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind;
+        Kind = RegisterKind;
         Loc.RegNo = RegNo;
       } else if (MI.getOperand(0).isImm()) {
         Kind = ImmediateKind;
@@ -239,17 +209,50 @@ private:
         Kind = ImmediateKind;
         Loc.CImm = MI.getOperand(0).getCImm();
       }
-      assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) &&
-             "entry values must be register locations");
+
+      // We create the debug entry values from the factory functions rather than
+      // from this ctor.
+      assert(Kind != EntryValueKind && !isEntryBackupLoc());
     }
 
     /// Take the variable and machine-location in DBG_VALUE MI, and build an
     /// entry location using the given expression.
     static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
-                                 const DIExpression *EntryExpr) {
+                                 const DIExpression *EntryExpr, unsigned Reg) {
       VarLoc VL(MI, LS);
+      assert(VL.Kind == RegisterKind);
       VL.Kind = EntryValueKind;
       VL.Expr = EntryExpr;
+      VL.Loc.RegNo = Reg;
+      return VL;
+    }
+
+    /// Take the variable and machine-location from the DBG_VALUE (from the
+    /// function entry), and build an entry value backup location. The backup
+    /// location will turn into the normal location if the backup is valid at
+    /// the time of the primary location clobbering.
+    static VarLoc CreateEntryBackupLoc(const MachineInstr &MI,
+                                       LexicalScopes &LS,
+                                       const DIExpression *EntryExpr) {
+      VarLoc VL(MI, LS);
+      assert(VL.Kind == RegisterKind);
+      VL.Kind = EntryValueBackupKind;
+      VL.Expr = EntryExpr;
+      return VL;
+    }
+
+    /// Take the variable and machine-location from the DBG_VALUE (from the
+    /// function entry), and build a copy of an entry value backup location by
+    /// setting the register location to NewReg.
+    static VarLoc CreateEntryCopyBackupLoc(const MachineInstr &MI,
+                                           LexicalScopes &LS,
+                                           const DIExpression *EntryExpr,
+                                           unsigned NewReg) {
+      VarLoc VL(MI, LS);
+      assert(VL.Kind == RegisterKind);
+      VL.Kind = EntryValueCopyBackupKind;
+      VL.Expr = EntryExpr;
+      VL.Loc.RegNo = NewReg;
       return VL;
     }
 
@@ -288,8 +291,11 @@ private:
       switch (Kind) {
       case EntryValueKind:
         // An entry value is a register location -- but with an updated
-        // expression.
-        return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr);
+        // expression. The register location of such DBG_VALUE is always the one
+        // from the entry DBG_VALUE, it does not matter if the entry value was
+        // copied in to another register due to some optimizations.
+        return BuildMI(MF, DbgLoc, IID, Indirect, MI.getOperand(0).getReg(),
+                       Var, Expr);
       case RegisterKind:
         // Register locations are like the source DBG_VALUE, but with the
         // register number from this VarLoc.
@@ -308,8 +314,11 @@ private:
         MachineOperand MO = MI.getOperand(0);
         return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr);
       }
+      case EntryValueBackupKind:
+      case EntryValueCopyBackupKind:
       case InvalidKind:
-        llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
+        llvm_unreachable(
+            "Tried to produce DBG_VALUE for invalid or backup VarLoc");
       }
       llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum");
     }
@@ -317,6 +326,27 @@ private:
     /// Is the Loc field a constant or constant object?
     bool isConstant() const { return Kind == ImmediateKind; }
 
+    /// Check if the Loc field is an entry backup location.
+    bool isEntryBackupLoc() const {
+      return Kind == EntryValueBackupKind || Kind == EntryValueCopyBackupKind;
+    }
+
+    /// If this variable is described by a register holding the entry value,
+    /// return it, otherwise return 0.
+    unsigned getEntryValueBackupReg() const {
+      if (Kind == EntryValueBackupKind)
+        return Loc.RegNo;
+      return 0;
+    }
+
+    /// If this variable is described by a register holding the copy of the
+    /// entry value, return it, otherwise return 0.
+    unsigned getEntryValueCopyBackupReg() const {
+      if (Kind == EntryValueCopyBackupKind)
+        return Loc.RegNo;
+      return 0;
+    }
+
     /// If this variable is described by a register, return it,
     /// otherwise return 0.
     unsigned isDescribedByReg() const {
@@ -336,6 +366,8 @@ private:
       switch (Kind) {
       case RegisterKind:
       case EntryValueKind:
+      case EntryValueBackupKind:
+      case EntryValueCopyBackupKind:
         dbgs() << printReg(Loc.RegNo, TRI);
         break;
       case SpillLocKind:
@@ -349,11 +381,17 @@ private:
         llvm_unreachable("Invalid VarLoc in dump method");
       }
 
-      dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", ";
+      dbgs() << ", \"" << Var.getVariable()->getName() << "\", " << *Expr
+             << ", ";
       if (Var.getInlinedAt())
         dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
       else
-        dbgs() << "(null))\n";
+        dbgs() << "(null))";
+
+      if (isEntryBackupLoc())
+        dbgs() << " (backup loc)\n";
+      else
+        dbgs() << "\n";
     }
 #endif
 
@@ -369,7 +407,6 @@ private:
     }
   };
 
-  using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>;
   using VarLocMap = UniqueVector<VarLoc>;
   using VarLocSet = SparseBitVector<>;
   using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
@@ -395,10 +432,18 @@ private:
   /// This holds the working set of currently open ranges. For fast
   /// access, this is done both as a set of VarLocIDs, and a map of
   /// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
-  /// previous open ranges for the same variable.
+  /// previous open ranges for the same variable. In addition, we keep
+  /// two different maps (Vars/EntryValuesBackupVars), so erase/insert
+  /// methods act differently depending on whether a VarLoc is primary
+  /// location or backup one. In the case the VarLoc is backup location
+  /// we will erase/insert from the EntryValuesBackupVars map, otherwise
+  /// we perform the operation on the Vars.
   class OpenRangesSet {
     VarLocSet VarLocs;
+    // Map the DebugVariable to recent primary location ID.
     SmallDenseMap<DebugVariable, unsigned, 8> Vars;
+    // Map the DebugVariable to recent backup location ID.
+    SmallDenseMap<DebugVariable, unsigned, 8> EntryValuesBackupVars;
     OverlapMap &OverlappingFragments;
 
   public:
@@ -406,40 +451,38 @@ private:
 
     const VarLocSet &getVarLocs() const { return VarLocs; }
 
-    /// Terminate all open ranges for Var by removing it from the set.
-    void erase(DebugVariable Var);
+    /// Terminate all open ranges for VL.Var by removing it from the set.
+    void erase(const VarLoc &VL);
 
     /// Terminate all open ranges listed in \c KillSet by removing
     /// them from the set.
-    void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs) {
-      VarLocs.intersectWithComplement(KillSet);
-      for (unsigned ID : KillSet)
-        Vars.erase(VarLocIDs[ID].Var);
-    }
+    void erase(const VarLocSet &KillSet, const VarLocMap &VarLocIDs);
 
     /// Insert a new range into the set.
-    void insert(unsigned VarLocID, DebugVariable Var) {
-      VarLocs.set(VarLocID);
-      Vars.insert({Var, VarLocID});
-    }
+    void insert(unsigned VarLocID, const VarLoc &VL);
 
     /// Insert a set of ranges.
     void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) {
       for (unsigned Id : ToLoad) {
-        const VarLoc &Var = Map[Id];
-        insert(Id, Var.Var);
+        const VarLoc &VarL = Map[Id];
+        insert(Id, VarL);
       }
     }
 
+    llvm::Optional<unsigned> getEntryValueBackup(DebugVariable Var);
+
     /// Empty the set.
     void clear() {
       VarLocs.clear();
       Vars.clear();
+      EntryValuesBackupVars.clear();
     }
 
     /// Return whether the set is empty or not.
     bool empty() const {
-      assert(Vars.empty() == VarLocs.empty() && "open ranges are inconsistent");
+      assert(Vars.empty() == EntryValuesBackupVars.empty() &&
+             Vars.empty() == VarLocs.empty() &&
+             "open ranges are inconsistent");
       return VarLocs.empty();
     }
   };
@@ -456,6 +499,14 @@ private:
   bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
                        unsigned &Reg);
 
+  /// Returns true if the given machine instruction is a debug value which we
+  /// can emit entry values for.
+  ///
+  /// Currently, we generate debug entry values only for parameters that are
+  /// unmodified throughout the function and located in a register.
+  bool isEntryValueCandidate(const MachineInstr &MI,
+                             const DefinedRegsSet &Regs) const;
+
   /// If a given instruction is identified as a spill, return the spill location
   /// and set \p Reg to the spilled register.
   Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
@@ -473,23 +524,23 @@ private:
                           VarLocMap &VarLocIDs);
   void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
                                   VarLocMap &VarLocIDs, TransferMap &Transfers);
+  bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+                        VarLocMap &VarLocIDs, const VarLoc &EntryVL);
   void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
                        VarLocMap &VarLocIDs, TransferMap &Transfers,
-                       DebugParamMap &DebugEntryVals,
                        SparseBitVector<> &KillSet);
+  void recordEntryValue(const MachineInstr &MI,
+                        const DefinedRegsSet &DefinedRegs,
+                        OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs);
   void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
                             VarLocMap &VarLocIDs, TransferMap &Transfers);
   void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
-                           VarLocMap &VarLocIDs, TransferMap &Transfers,
-                           DebugParamMap &DebugEntryVals);
+                           VarLocMap &VarLocIDs, TransferMap &Transfers);
   bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
                           VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
 
   void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
-               VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
-               TransferMap &Transfers, DebugParamMap &DebugEntryVals,
-               OverlapMap &OverlapFragments,
-               VarToFragments &SeenFragments);
+               VarLocMap &VarLocIDs, TransferMap &Transfers);
 
   void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
                              OverlapMap &OLapMap);
@@ -532,46 +583,10 @@ public:
 
 } // end anonymous namespace
 
-namespace llvm {
-
-template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> {
-  using DV = LiveDebugValues::DebugVariable;
-  using OptFragmentInfo = LiveDebugValues::OptFragmentInfo;
-  using FragmentInfo = LiveDebugValues::FragmentInfo;
-
-  // Empty key: no key should be generated that has no DILocalVariable.
-  static inline DV getEmptyKey() {
-    return DV(nullptr, OptFragmentInfo(), nullptr);
-  }
-
-  // Difference in tombstone is that the Optional is meaningful
-  static inline DV getTombstoneKey() {
-    return DV(nullptr, OptFragmentInfo({0, 0}), nullptr);
-  }
-
-  static unsigned getHashValue(const DV &D) {
-    unsigned HV = 0;
-    const OptFragmentInfo &Fragment = D.getFragment();
-    if (Fragment)
-      HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment);
-
-    return hash_combine(D.getVar(), HV, D.getInlinedAt());
-  }
-
-  static bool isEqual(const DV &A, const DV &B) { return A == B; }
-};
-
-} // namespace llvm
-
 //===----------------------------------------------------------------------===//
 //            Implementation
 //===----------------------------------------------------------------------===//
 
-const DIExpression::FragmentInfo
-    LiveDebugValues::DebugVariable::DefaultFragment = {
-        std::numeric_limits<uint64_t>::max(),
-        std::numeric_limits<uint64_t>::min()};
-
 char LiveDebugValues::ID = 0;
 
 char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
@@ -592,38 +607,72 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 /// Erase a variable from the set of open ranges, and additionally erase any
-/// fragments that may overlap it.
-void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) {
+/// fragments that may overlap it. If the VarLoc is a buckup location, erase
+/// the variable from the EntryValuesBackupVars set, indicating we should stop
+/// tracking its backup entry location. Otherwise, if the VarLoc is primary
+/// location, erase the variable from the Vars set.
+void LiveDebugValues::OpenRangesSet::erase(const VarLoc &VL) {
   // Erasure helper.
-  auto DoErase = [this](DebugVariable VarToErase) {
-    auto It = Vars.find(VarToErase);
-    if (It != Vars.end()) {
+  auto DoErase = [VL, this](DebugVariable VarToErase) {
+    auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+    auto It = EraseFrom->find(VarToErase);
+    if (It != EraseFrom->end()) {
       unsigned ID = It->second;
       VarLocs.reset(ID);
-      Vars.erase(It);
+      EraseFrom->erase(It);
     }
   };
 
+  DebugVariable Var = VL.Var;
+
   // Erase the variable/fragment that ends here.
   DoErase(Var);
 
   // Extract the fragment. Interpret an empty fragment as one that covers all
   // possible bits.
-  FragmentInfo ThisFragment = Var.getFragmentDefault();
+  FragmentInfo ThisFragment = Var.getFragmentOrDefault();
 
   // There may be fragments that overlap the designated fragment. Look them up
   // in the pre-computed overlap map, and erase them too.
-  auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment});
+  auto MapIt = OverlappingFragments.find({Var.getVariable(), ThisFragment});
   if (MapIt != OverlappingFragments.end()) {
     for (auto Fragment : MapIt->second) {
       LiveDebugValues::OptFragmentInfo FragmentHolder;
-      if (!DebugVariable::isFragmentDefault(Fragment))
+      if (!DebugVariable::isDefaultFragment(Fragment))
         FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
-      DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()});
+      DoErase({Var.getVariable(), FragmentHolder, Var.getInlinedAt()});
     }
   }
 }
 
+void LiveDebugValues::OpenRangesSet::erase(const VarLocSet &KillSet,
+                                           const VarLocMap &VarLocIDs) {
+  VarLocs.intersectWithComplement(KillSet);
+  for (unsigned ID : KillSet) {
+    const VarLoc *VL = &VarLocIDs[ID];
+    auto *EraseFrom = VL->isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+    EraseFrom->erase(VL->Var);
+  }
+}
+
+void LiveDebugValues::OpenRangesSet::insert(unsigned VarLocID,
+                                            const VarLoc &VL) {
+  auto *InsertInto = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
+  VarLocs.set(VarLocID);
+  InsertInto->insert({VL.Var, VarLocID});
+}
+
+/// Return the Loc ID of an entry value backup location, if it exists for the
+/// variable.
+llvm::Optional<unsigned>
+LiveDebugValues::OpenRangesSet::getEntryValueBackup(DebugVariable Var) {
+  auto It = EntryValuesBackupVars.find(Var);
+  if (It != EntryValuesBackupVars.end())
+    return It->second;
+
+  return llvm::None;
+}
+
 //===----------------------------------------------------------------------===//
 //            Debug Range Extension Implementation
 //===----------------------------------------------------------------------===//
@@ -642,7 +691,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
     Out << "MBB: " << BB.getNumber() << ":\n";
     for (unsigned VLL : L) {
       const VarLoc &VL = VarLocIDs[VLL];
-      Out << " Var: " << VL.Var.getVar()->getName();
+      Out << " Var: " << VL.Var.getVariable()->getName();
       Out << " MI: ";
       VL.dump(TRI, Out);
     }
@@ -666,6 +715,62 @@ LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
   return {Reg, Offset};
 }
 
+/// Try to salvage the debug entry value if we encounter a new debug value
+/// describing the same parameter, otherwise stop tracking the value. Return
+/// true if we should stop tracking the entry value, otherwise return false.
+bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
+                                       OpenRangesSet &OpenRanges,
+                                       VarLocMap &VarLocIDs,
+                                       const VarLoc &EntryVL) {
+  // Skip the DBG_VALUE which is the debug entry value itself.
+  if (MI.isIdenticalTo(EntryVL.MI))
+    return false;
+
+  // If the parameter's location is not register location, we can not track
+  // the entry value any more. In addition, if the debug expression from the
+  // DBG_VALUE is not empty, we can assume the parameter's value has changed
+  // indicating that we should stop tracking its entry value as well.
+  if (!MI.getOperand(0).isReg() ||
+      MI.getDebugExpression()->getNumElements() != 0)
+    return true;
+
+  // If the DBG_VALUE comes from a copy instruction that copies the entry value,
+  // it means the parameter's value has not changed and we should be able to use
+  // its entry value.
+  bool TrySalvageEntryValue = false;
+  Register Reg = MI.getOperand(0).getReg();
+  auto I = std::next(MI.getReverseIterator());
+  const MachineOperand *SrcRegOp, *DestRegOp;
+  if (I != MI.getParent()->rend()) {
+    // TODO: Try to keep tracking of an entry value if we encounter a propagated
+    // DBG_VALUE describing the copy of the entry value. (Propagated entry value
+    // does not indicate the parameter modification.)
+    auto DestSrc = TII->isCopyInstr(*I);
+    if (!DestSrc)
+      return true;
+
+    SrcRegOp = DestSrc->Source;
+    DestRegOp = DestSrc->Destination;
+    if (Reg != DestRegOp->getReg())
+      return true;
+    TrySalvageEntryValue = true;
+  }
+
+  if (TrySalvageEntryValue) {
+    for (unsigned ID : OpenRanges.getVarLocs()) {
+      const VarLoc &VL = VarLocIDs[ID];
+      if (!VL.isEntryBackupLoc())
+        continue;
+
+      if (VL.getEntryValueCopyBackupReg() == Reg &&
+          VL.MI.getOperand(0).getReg() == SrcRegOp->getReg())
+        return false;
+    }
+  }
+
+  return true;
+}
+
 /// End all previous ranges related to @MI and start a new range from @MI
 /// if it is a DBG_VALUE instr.
 void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
@@ -680,18 +785,33 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
   assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
          "Expected inlined-at fields to agree");
 
-  // End all previous ranges of Var.
   DebugVariable V(Var, Expr, InlinedAt);
-  OpenRanges.erase(V);
 
-  // Add the VarLoc to OpenRanges from this DBG_VALUE.
+  // Check if this DBG_VALUE indicates a parameter's value changing.
+  // If that is the case, we should stop tracking its entry value.
+  auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
+  if (Var->isParameter() && EntryValBackupID) {
+    const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
+    if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) {
+      LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
+                 MI.print(dbgs(), /*IsStandalone*/ false,
+                          /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
+                          /*AddNewLine*/ true, TII));
+      OpenRanges.erase(EntryVL);
+    }
+  }
+
   unsigned ID;
   if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() ||
       MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) {
     // Use normal VarLoc constructor for registers and immediates.
     VarLoc VL(MI, LS);
+    // End all previous ranges of VL.Var.
+    OpenRanges.erase(VL);
+
     ID = VarLocIDs.insert(VL);
-    OpenRanges.insert(ID, VL.Var);
+    // Add the VarLoc to OpenRanges from this DBG_VALUE.
+    OpenRanges.insert(ID, VL);
   } else if (MI.hasOneMemOperand()) {
     llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
   } else {
@@ -701,32 +821,30 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
   }
 }
 
+/// Turn the entry value backup locations into primary locations.
 void LiveDebugValues::emitEntryValues(MachineInstr &MI,
                                       OpenRangesSet &OpenRanges,
                                       VarLocMap &VarLocIDs,
                                       TransferMap &Transfers,
-                                      DebugParamMap &DebugEntryVals,
                                       SparseBitVector<> &KillSet) {
   for (unsigned ID : KillSet) {
-    if (!VarLocIDs[ID].Var.getVar()->isParameter())
+    if (!VarLocIDs[ID].Var.getVariable()->isParameter())
       continue;
 
-    const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI;
+    auto DebugVar = VarLocIDs[ID].Var;
+    auto EntryValBackupID = OpenRanges.getEntryValueBackup(DebugVar);
 
-    // If parameter's DBG_VALUE is not in the map that means we can't
-    // generate parameter's entry value.
-    if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable()))
+    // If the parameter has the entry value backup, it means we should
+    // be able to use its entry value.
+    if (!EntryValBackupID)
       continue;
 
-    auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
-    DIExpression *NewExpr = DIExpression::prepend(
-        ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
-
-    VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr);
-
-    unsigned EntryValLocID = VarLocIDs.insert(EntryLoc);
-    Transfers.push_back({&MI, EntryValLocID});
-    OpenRanges.insert(EntryValLocID, EntryLoc.Var);
+    const VarLoc &EntryVL = VarLocIDs[*EntryValBackupID];
+    VarLoc EntryLoc =
+        VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr, EntryVL.Loc.RegNo);
+    unsigned EntryValueID = VarLocIDs.insert(EntryLoc);
+    Transfers.push_back({&MI, EntryValueID});
+    OpenRanges.insert(EntryValueID, EntryLoc);
   }
 }
 
@@ -741,23 +859,21 @@ void LiveDebugValues::insertTransferDebugPair(
     unsigned NewReg) {
   const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
 
-  auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
-                        &VarLocIDs](VarLoc &VL) {
+  auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &VarLocIDs](VarLoc &VL) {
     unsigned LocId = VarLocIDs.insert(VL);
 
     // Close this variable's previous location range.
-    DebugVariable V(*DebugInstr);
-    OpenRanges.erase(V);
+    OpenRanges.erase(VL);
 
     // Record the new location as an open range, and a postponed transfer
     // inserting a DBG_VALUE for this location.
-    OpenRanges.insert(LocId, VL.Var);
+    OpenRanges.insert(LocId, VL);
     TransferDebugPair MIP = {&MI, LocId};
     Transfers.push_back(MIP);
   };
 
-  // End all previous ranges of Var.
-  OpenRanges.erase(VarLocIDs[OldVarID].Var);
+  // End all previous ranges of VL.Var.
+  OpenRanges.erase(VarLocIDs[OldVarID]);
   switch (Kind) {
   case TransferKind::TransferCopy: {
     assert(NewReg &&
@@ -788,8 +904,6 @@ void LiveDebugValues::insertTransferDebugPair(
   case TransferKind::TransferRestore: {
     assert(NewReg &&
            "No register supplied when handling a restore of a debug value");
-    MachineFunction *MF = MI.getMF();
-    DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
     // DebugInstr refers to the pre-spill location, therefore we can reuse
     // its expression.
     VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
@@ -807,7 +921,7 @@ void LiveDebugValues::insertTransferDebugPair(
 /// A definition of a register may mark the end of a range.
 void LiveDebugValues::transferRegisterDef(
     MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
-    TransferMap &Transfers, DebugParamMap &DebugEntryVals) {
+    TransferMap &Transfers) {
   MachineFunction *MF = MI.getMF();
   const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
   unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
@@ -841,8 +955,7 @@ void LiveDebugValues::transferRegisterDef(
   if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
     auto &TM = TPC->getTM<TargetMachine>();
     if (TM.Options.EnableDebugEntryValues)
-      emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals,
-                      KillSet);
+      emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
   }
 }
 
@@ -980,12 +1093,12 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
     if (TKind == TransferKind::TransferSpill &&
         VarLocIDs[ID].isDescribedByReg() == Reg) {
       LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
-                        << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+                        << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
     } else if (TKind == TransferKind::TransferRestore &&
                VarLocIDs[ID].Kind == VarLoc::SpillLocKind &&
                VarLocIDs[ID].Loc.SpillLocation == *Loc) {
       LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
-                        << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+                        << VarLocIDs[ID].Var.getVariable()->getName() << ")\n");
     } else
       continue;
     insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind,
@@ -1001,13 +1114,17 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
                                            OpenRangesSet &OpenRanges,
                                            VarLocMap &VarLocIDs,
                                            TransferMap &Transfers) {
-  const MachineOperand *SrcRegOp, *DestRegOp;
+  auto DestSrc = TII->isCopyInstr(MI);
+  if (!DestSrc)
+    return;
+
+  const MachineOperand *DestRegOp = DestSrc->Destination;
+  const MachineOperand *SrcRegOp = DestSrc->Source;
 
-  if (!TII->isCopyInstr(MI, SrcRegOp, DestRegOp) || !SrcRegOp->isKill() ||
-      !DestRegOp->isDef())
+  if (!DestRegOp->isDef())
     return;
 
-  auto isCalleSavedReg = [&](unsigned Reg) {
+  auto isCalleeSavedReg = [&](unsigned Reg) {
     for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
       if (CalleeSavedRegs.test(*RAI))
         return true;
@@ -1022,7 +1139,31 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
   // included, there would be a great chance that it is going to be clobbered
   // soon. It is more likely that previous register location, which is callee
   // saved, is going to stay unclobbered longer, even if it is killed.
-  if (!isCalleSavedReg(DestReg))
+  if (!isCalleeSavedReg(DestReg))
+    return;
+
+  // Remember an entry value movement. If we encounter a new debug value of
+  // a parameter describing only a moving of the value around, rather then
+  // modifying it, we are still able to use the entry value if needed.
+  if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) {
+    for (unsigned ID : OpenRanges.getVarLocs()) {
+      if (VarLocIDs[ID].getEntryValueBackupReg() == SrcReg) {
+        LLVM_DEBUG(dbgs() << "Copy of the entry value: "; MI.dump(););
+        VarLoc EntryValLocCopyBackup = VarLoc::CreateEntryCopyBackupLoc(
+            VarLocIDs[ID].MI, LS, VarLocIDs[ID].Expr, DestReg);
+
+        // Stop tracking the original entry value.
+        OpenRanges.erase(VarLocIDs[ID]);
+
+        // Start tracking the entry value copy.
+        unsigned EntryValCopyLocID = VarLocIDs.insert(EntryValLocCopyBackup);
+        OpenRanges.insert(EntryValCopyLocID, EntryValLocCopyBackup);
+        break;
+      }
+    }
+  }
+
+  if (!SrcRegOp->isKill())
     return;
 
   for (unsigned ID : OpenRanges.getVarLocs()) {
@@ -1070,26 +1211,27 @@ bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
 void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
                                             VarToFragments &SeenFragments,
                                             OverlapMap &OverlappingFragments) {
-  DebugVariable MIVar(MI);
-  FragmentInfo ThisFragment = MIVar.getFragmentDefault();
+  DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
+                      MI.getDebugLoc()->getInlinedAt());
+  FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
 
   // If this is the first sighting of this variable, then we are guaranteed
   // there are currently no overlapping fragments either. Initialize the set
   // of seen fragments, record no overlaps for the current one, and return.
-  auto SeenIt = SeenFragments.find(MIVar.getVar());
+  auto SeenIt = SeenFragments.find(MIVar.getVariable());
   if (SeenIt == SeenFragments.end()) {
     SmallSet<FragmentInfo, 4> OneFragment;
     OneFragment.insert(ThisFragment);
-    SeenFragments.insert({MIVar.getVar(), OneFragment});
+    SeenFragments.insert({MIVar.getVariable(), OneFragment});
 
-    OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+    OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
     return;
   }
 
   // If this particular Variable/Fragment pair already exists in the overlap
   // map, it has already been accounted for.
   auto IsInOLapMap =
-      OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+      OverlappingFragments.insert({{MIVar.getVariable(), ThisFragment}, {}});
   if (!IsInOLapMap.second)
     return;
 
@@ -1107,7 +1249,7 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
       // Mark the previously seen fragment as being overlapped by the current
       // one.
       auto ASeenFragmentsOverlaps =
-          OverlappingFragments.find({MIVar.getVar(), ASeenFragment});
+          OverlappingFragments.find({MIVar.getVariable(), ASeenFragment});
       assert(ASeenFragmentsOverlaps != OverlappingFragments.end() &&
              "Previously seen var fragment has no vector of overlaps");
       ASeenFragmentsOverlaps->second.push_back(ThisFragment);
@@ -1117,16 +1259,11 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
   AllSeenFragments.insert(ThisFragment);
 }
 
-/// This routine creates OpenRanges and OutLocs.
+/// This routine creates OpenRanges.
 void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
-                              VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
-                              TransferMap &Transfers,
-                              DebugParamMap &DebugEntryVals,
-                              OverlapMap &OverlapFragments,
-                              VarToFragments &SeenFragments) {
+                              VarLocMap &VarLocIDs, TransferMap &Transfers) {
   transferDebugValue(MI, OpenRanges, VarLocIDs);
-  transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
-                      DebugEntryVals);
+  transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
   transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
   transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
 }
@@ -1175,7 +1312,7 @@ bool LiveDebugValues::join(
       if (!InLocsT.empty()) {
         for (auto ID : InLocsT)
           dbgs() << "  gathered candidate incoming var: "
-                 << VarLocIDs[ID].Var.getVar()->getName() << "\n";
+                 << VarLocIDs[ID].Var.getVariable()->getName() << "\n";
       }
     });
 
@@ -1190,7 +1327,7 @@ bool LiveDebugValues::join(
       if (!VarLocIDs[ID].dominates(MBB)) {
         KillSet.set(ID);
         LLVM_DEBUG({
-          auto Name = VarLocIDs[ID].Var.getVar()->getName();
+          auto Name = VarLocIDs[ID].Var.getVariable()->getName();
           dbgs() << "  killing " << Name << ", it doesn't dominate MBB\n";
         });
       }
@@ -1247,6 +1384,8 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
       // The ID location is live-in to MBB -- work out what kind of machine
       // location it is and create a DBG_VALUE.
       const VarLoc &DiffIt = VarLocIDs[ID];
+      if (DiffIt.isEntryBackupLoc())
+        continue;
       MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
       MBB.insert(MBB.instr_begin(), MI);
 
@@ -1256,6 +1395,87 @@ void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
   }
 }
 
+bool LiveDebugValues::isEntryValueCandidate(
+    const MachineInstr &MI, const DefinedRegsSet &DefinedRegs) const {
+  assert(MI.isDebugValue() && "This must be DBG_VALUE.");
+
+  // TODO: Add support for local variables that are expressed in terms of
+  // parameters entry values.
+  // TODO: Add support for modified arguments that can be expressed
+  // by using its entry value.
+  auto *DIVar = MI.getDebugVariable();
+  if (!DIVar->isParameter())
+    return false;
+
+  // Do not consider parameters that belong to an inlined function.
+  if (MI.getDebugLoc()->getInlinedAt())
+    return false;
+
+  // Do not consider indirect debug values (TODO: explain why).
+  if (MI.isIndirectDebugValue())
+    return false;
+
+  // Only consider parameters that are described using registers. Parameters
+  // that are passed on the stack are not yet supported, so ignore debug
+  // values that are described by the frame or stack pointer.
+  if (!isRegOtherThanSPAndFP(MI.getOperand(0), MI, TRI))
+    return false;
+
+  // If a parameter's value has been propagated from the caller, then the
+  // parameter's DBG_VALUE may be described using a register defined by some
+  // instruction in the entry block, in which case we shouldn't create an
+  // entry value.
+  if (DefinedRegs.count(MI.getOperand(0).getReg()))
+    return false;
+
+  // TODO: Add support for parameters that have a pre-existing debug expressions
+  // (e.g. fragments, or indirect parameters using DW_OP_deref).
+  if (MI.getDebugExpression()->getNumElements() > 0)
+    return false;
+
+  return true;
+}
+
+/// Collect all register defines (including aliases) for the given instruction.
+static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
+                           const TargetRegisterInfo *TRI) {
+  for (const MachineOperand &MO : MI.operands())
+    if (MO.isReg() && MO.isDef() && MO.getReg())
+      for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+        Regs.insert(*AI);
+}
+
+/// This routine records the entry values of function parameters. The values
+/// could be used as backup values. If we loose the track of some unmodified
+/// parameters, the backup values will be used as a primary locations.
+void LiveDebugValues::recordEntryValue(const MachineInstr &MI,
+                                       const DefinedRegsSet &DefinedRegs,
+                                       OpenRangesSet &OpenRanges,
+                                       VarLocMap &VarLocIDs) {
+  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+    auto &TM = TPC->getTM<TargetMachine>();
+    if (!TM.Options.EnableDebugEntryValues)
+      return;
+  }
+
+  DebugVariable V(MI.getDebugVariable(), MI.getDebugExpression(),
+                  MI.getDebugLoc()->getInlinedAt());
+
+  if (!isEntryValueCandidate(MI, DefinedRegs) ||
+      OpenRanges.getEntryValueBackup(V))
+    return;
+
+  LLVM_DEBUG(dbgs() << "Creating the backup entry location: "; MI.dump(););
+
+  // Create the entry value and use it as a backup location until it is
+  // valid. It is valid until a parameter is not changed.
+  DIExpression *NewExpr =
+      DIExpression::prepend(MI.getDebugExpression(), DIExpression::EntryValue);
+  VarLoc EntryValLocAsBackup = VarLoc::CreateEntryBackupLoc(MI, LS, NewExpr);
+  unsigned EntryValLocID = VarLocIDs.insert(EntryValLocAsBackup);
+  OpenRanges.insert(EntryValLocID, EntryValLocAsBackup);
+}
+
 /// Calculate the liveness information for the given machine function and
 /// extend ranges across basic blocks.
 bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
@@ -1266,12 +1486,13 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
   bool MBBJoined = false;
 
   VarLocMap VarLocIDs;         // Map VarLoc<>unique ID for use in bitvectors.
-  OverlapMap OverlapFragments; // Map of overlapping variable fragments
+  OverlapMap OverlapFragments; // Map of overlapping variable fragments.
   OpenRangesSet OpenRanges(OverlapFragments);
                               // Ranges that are open until end of bb.
   VarLocInMBB OutLocs;        // Ranges that exist beyond bb.
   VarLocInMBB InLocs;         // Ranges that are incoming after joining.
-  TransferMap Transfers;      // DBG_VALUEs associated with spills.
+  TransferMap Transfers;      // DBG_VALUEs associated with transfers (such as
+                              // spills, copies and restores).
   VarLocInMBB PendingInLocs;  // Ranges that are incoming after joining, but
                               // that we have deferred creating DBG_VALUE insts
                               // for immediately.
@@ -1291,42 +1512,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
                       std::greater<unsigned int>>
       Pending;
 
-  // Besides parameter's modification, check whether a DBG_VALUE is inlined
-  // in order to deduce whether the variable that it tracks comes from
-  // a different function. If that is the case we can't track its entry value.
-  auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) {
-    auto *DIVar = MI.getDebugVariable();
-    return DIVar->isParameter() && DIVar->isNotModified() &&
-           !MI.getDebugLoc()->getInlinedAt();
-  };
-
-  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
-  unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
-  Register FP = TRI->getFrameRegister(MF);
-  auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
-    return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
-  };
-
-  // Working set of currently collected debug variables mapped to DBG_VALUEs
-  // representing candidates for production of debug entry values.
-  DebugParamMap DebugEntryVals;
+  // Set of register defines that are seen when traversing the entry block
+  // looking for debug entry value candidates.
+  DefinedRegsSet DefinedRegs;
 
-  MachineBasicBlock &First_MBB = *(MF.begin());
   // Only in the case of entry MBB collect DBG_VALUEs representing
   // function parameters in order to generate debug entry values for them.
-  // Currently, we generate debug entry values only for parameters that are
-  // unmodified throughout the function and located in a register.
-  // TODO: Add support for parameters that are described as fragments.
-  // TODO: Add support for modified arguments that can be expressed
-  // by using its entry value.
-  // TODO: Add support for local variables that are expressed in terms of
-  // parameters entry values.
-  for (auto &MI : First_MBB)
-    if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) &&
-        !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) &&
-        !DebugEntryVals.count(MI.getDebugVariable()) &&
-        !MI.getDebugExpression()->isFragment())
-      DebugEntryVals[MI.getDebugVariable()] = &MI;
+  MachineBasicBlock &First_MBB = *(MF.begin());
+  for (auto &MI : First_MBB) {
+    collectRegDefs(MI, DefinedRegs, TRI);
+      if (MI.isDebugValue())
+        recordEntryValue(MI, DefinedRegs, OpenRanges, VarLocIDs);
+  }
 
   // Initialize per-block structures and scan for fragment overlaps.
   for (auto &MBB : MF) {
@@ -1379,13 +1576,12 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
         MBBJoined = false;
         Changed = true;
         // Now that we have started to extend ranges across BBs we need to
-        // examine spill instructions to see whether they spill registers that
-        // correspond to user variables.
+        // examine spill, copy and restore instructions to see whether they
+        // operate with registers that correspond to user variables.
         // First load any pending inlocs.
         OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs);
         for (auto &MI : *MBB)
-              process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
-                      DebugEntryVals, OverlapFragments, SeenFragments);
+          process(MI, OpenRanges, VarLocIDs, Transfers);
         OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
 
         LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
@@ -1439,8 +1635,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getSubtarget().getRegisterInfo();
   TII = MF.getSubtarget().getInstrInfo();
   TFI = MF.getSubtarget().getFrameLowering();
-  TFI->determineCalleeSaves(MF, CalleeSavedRegs,
-                            std::make_unique<RegScavenger>().get());
+  TFI->getCalleeSaves(MF, CalleeSavedRegs);
   LS.initialize(MF);
 
   bool Changed = ExtendRanges(MF);
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 2dd462fc72b3..2cc547a6b741 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -49,6 +49,7 @@
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
@@ -142,51 +143,22 @@ namespace {
 
 class LDVImpl;
 
-/// A UserValue is uniquely identified by the source variable it refers to
-/// (Variable), the expression describing how to get the value (Expression) and
-/// the specific usage (InlinedAt). InlinedAt differentiates both between
-/// inline and non-inline functions, and multiple inlined instances in the same
-/// scope. FIXME: The only part of the Expression which matters for UserValue
-/// identification is the fragment part.
-class UserValueIdentity {
-private:
-  /// The debug info variable we are part of.
-  const DILocalVariable *Variable;
-  /// Any complex address expression.
-  const DIExpression *Expression;
-  /// Function usage identification.
-  const DILocation *InlinedAt;
-
-public:
-  UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr,
-                    const DILocation *IA)
-      : Variable(Var), Expression(Expr), InlinedAt(IA) {}
-
-  bool match(const DILocalVariable *Var, const DIExpression *Expr,
-             const DILocation *IA) const {
-    // FIXME: The fragment should be part of the identity, but not
-    // other things in the expression like stack values.
-    return Var == Variable && Expr == Expression && IA == InlinedAt;
-  }
-
-  bool match(const UserValueIdentity &Other) const {
-    return match(Other.Variable, Other.Expression, Other.InlinedAt);
-  }
-
-  unsigned hash_value() const {
-    return hash_combine(Variable, Expression, InlinedAt);
-  }
-};
-
 /// A user value is a part of a debug info user variable.
 ///
 /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
 /// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
 class UserValue {
   const DILocalVariable *Variable; ///< The debug info variable we are part of.
   const DIExpression *Expression; ///< Any complex address expression.
   DebugLoc dl;            ///< The debug location for the variable. This is
                           ///< used by dwarf writer to find lexical scope.
+  UserValue *leader;      ///< Equivalence class leader.
+  UserValue *next = nullptr; ///< Next value in equivalence class, or null.
 
   /// Numbered locations referenced by locmap.
   SmallVector<MachineOperand, 4> locations;
@@ -207,15 +179,49 @@ class UserValue {
                      LiveIntervals &LIS);
 
 public:
-  UserValue(const UserValue &) = delete;
-
   /// Create a new UserValue.
   UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
             LocMap::Allocator &alloc)
-      : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {}
+      : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
+        locInts(alloc) {}
+
+  /// Get the leader of this value's equivalence class.
+  UserValue *getLeader() {
+    UserValue *l = leader;
+    while (l != l->leader)
+      l = l->leader;
+    return leader = l;
+  }
+
+  /// Return the next UserValue in the equivalence class.
+  UserValue *getNext() const { return next; }
+
+  /// Does this UserValue match the parameters?
+  bool match(const DILocalVariable *Var, const DIExpression *Expr,
+             const DILocation *IA) const {
+    // FIXME: The fragment should be part of the equivalence class, but not
+    // other things in the expression like stack values.
+    return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
+  }
 
-  UserValueIdentity getId() {
-    return UserValueIdentity(Variable, Expression, dl->getInlinedAt());
+  /// Merge equivalence classes.
+  static UserValue *merge(UserValue *L1, UserValue *L2) {
+    L2 = L2->getLeader();
+    if (!L1)
+      return L2;
+    L1 = L1->getLeader();
+    if (L1 == L2)
+      return L1;
+    // Splice L2 before L1's members.
+    UserValue *End = L2;
+    while (End->next) {
+      End->leader = L1;
+      End = End->next;
+    }
+    End->leader = L1;
+    End->next = L1->next;
+    L1->next = L2;
+    return L1;
   }
 
   /// Return the location number that matches Loc.
@@ -250,6 +256,25 @@ public:
     return locations.size() - 1;
   }
 
+  /// Remove (recycle) a location number. If \p LocNo still is used by the
+  /// locInts nothing is done.
+  void removeLocationIfUnused(unsigned LocNo) {
+    // Bail out if LocNo still is used.
+    for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+      DbgValueLocation Loc = I.value();
+      if (Loc.locNo() == LocNo)
+        return;
+    }
+    // Remove the entry in the locations vector, and adjust all references to
+    // location numbers above the removed entry.
+    locations.erase(locations.begin() + LocNo);
+    for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+      DbgValueLocation Loc = I.value();
+      if (!Loc.isUndef() && Loc.locNo() > LocNo)
+        I.setValueUnchecked(Loc.changeLocNo(Loc.locNo() - 1));
+    }
+  }
+
   /// Ensure that all virtual register locations are mapped.
   void mapVirtRegs(LDVImpl *LDV);
 
@@ -327,29 +352,7 @@ public:
 
   void print(raw_ostream &, const TargetRegisterInfo *);
 };
-} // namespace
 
-namespace llvm {
-template <> struct DenseMapInfo<UserValueIdentity> {
-  static UserValueIdentity getEmptyKey() {
-    auto Key = DenseMapInfo<DILocalVariable *>::getEmptyKey();
-    return UserValueIdentity(Key, nullptr, nullptr);
-  }
-  static UserValueIdentity getTombstoneKey() {
-    auto Key = DenseMapInfo<DILocalVariable *>::getTombstoneKey();
-    return UserValueIdentity(Key, nullptr, nullptr);
-  }
-  static unsigned getHashValue(const UserValueIdentity &Val) {
-    return Val.hash_value();
-  }
-  static bool isEqual(const UserValueIdentity &LHS,
-                      const UserValueIdentity &RHS) {
-    return LHS.match(RHS);
-  }
-};
-} // namespace llvm
-
-namespace {
 /// A user label is a part of a debug info user label.
 class UserLabel {
   const DILabel *Label; ///< The debug info label we are part of.
@@ -401,20 +404,20 @@ class LDVImpl {
   /// All allocated UserLabel instances.
   SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
 
-  /// Map virtual register to UserValues which use it.
-  using VRMap = DenseMap<unsigned, SmallVector<UserValue *, 4>>;
-  VRMap VirtRegToUserVals;
+  /// Map virtual register to eq class leader.
+  using VRMap = DenseMap<unsigned, UserValue *>;
+  VRMap virtRegToEqClass;
 
-  /// Map unique UserValue identity to UserValue.
-  using UVMap = DenseMap<UserValueIdentity, UserValue *>;
-  UVMap UserVarMap;
+  /// Map user variable to eq class leader.
+  using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
+  UVMap userVarMap;
 
   /// Find or create a UserValue.
   UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
                           const DebugLoc &DL);
 
-  /// Find the UserValues for VirtReg or null.
-  SmallVectorImpl<UserValue *> *lookupVirtReg(unsigned VirtReg);
+  /// Find the EC leader for VirtReg or null.
+  UserValue *lookupVirtReg(unsigned VirtReg);
 
   /// Add DBG_VALUE instruction to our maps.
   ///
@@ -454,8 +457,8 @@ public:
     MF = nullptr;
     userValues.clear();
     userLabels.clear();
-    VirtRegToUserVals.clear();
-    UserVarMap.clear();
+    virtRegToEqClass.clear();
+    userVarMap.clear();
     // Make sure we call emitDebugValues if the machine function was modified.
     assert((!ModifiedMF || EmitDone) &&
            "Dbg values are not emitted in LDV");
@@ -463,8 +466,8 @@ public:
     ModifiedMF = false;
   }
 
-  /// Map virtual register to a UserValue.
-  void mapVirtReg(unsigned VirtReg, UserValue *UV);
+  /// Map virtual register to an equivalence class.
+  void mapVirtReg(unsigned VirtReg, UserValue *EC);
 
   /// Replace all references to OldReg with NewRegs.
   void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
@@ -503,7 +506,7 @@ static void printExtendedName(raw_ostream &OS, const DINode *Node,
                               const DILocation *DL) {
   const LLVMContext &Ctx = Node->getContext();
   StringRef Res;
-  unsigned Line;
+  unsigned Line = 0;
   if (const auto *V = dyn_cast<const DILocalVariable>(Node)) {
     Res = V->getName();
     Line = V->getLine();
@@ -572,27 +575,31 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) {
 
 UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
                                  const DIExpression *Expr, const DebugLoc &DL) {
-  auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt());
-  UserValue *&UVEntry = UserVarMap[Ident];
-
-  if (UVEntry)
-    return UVEntry;
+  UserValue *&Leader = userVarMap[Var];
+  if (Leader) {
+    UserValue *UV = Leader->getLeader();
+    Leader = UV;
+    for (; UV; UV = UV->getNext())
+      if (UV->match(Var, Expr, DL->getInlinedAt()))
+        return UV;
+  }
 
-  userValues.push_back(std::make_unique<UserValue>(Var, Expr, DL, allocator));
-  return UVEntry = userValues.back().get();
+  userValues.push_back(
+      std::make_unique<UserValue>(Var, Expr, DL, allocator));
+  UserValue *UV = userValues.back().get();
+  Leader = UserValue::merge(Leader, UV);
+  return UV;
 }
 
-void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) {
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
   assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs");
-  assert(UserVarMap.find(UV->getId()) != UserVarMap.end() &&
-         "UserValue should exist in UserVarMap");
-  VirtRegToUserVals[VirtReg].push_back(UV);
+  UserValue *&Leader = virtRegToEqClass[VirtReg];
+  Leader = UserValue::merge(Leader, EC);
 }
 
-SmallVectorImpl<UserValue *> *LDVImpl::lookupVirtReg(unsigned VirtReg) {
-  VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg);
-  if (Itr != VirtRegToUserVals.end())
-    return &Itr->getSecond();
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+  if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+    return UV->getLeader();
   return nullptr;
 }
 
@@ -1086,23 +1093,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
     }
   }
 
-  // Finally, remove any remaining OldLocNo intervals and OldLocNo itself.
-  locations.erase(locations.begin() + OldLocNo);
-  LocMapI.goToBegin();
-  while (LocMapI.valid()) {
-    DbgValueLocation v = LocMapI.value();
-    if (v.locNo() == OldLocNo) {
-      LLVM_DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';'
-                        << LocMapI.stop() << ")\n");
-      LocMapI.erase();
-    } else {
-      // Undef values always have location number UndefLocNo, so don't change
-      // locNo in that case. See getLocationNo().
-      if (!v.isUndef() && v.locNo() > OldLocNo)
-        LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1));
-      ++LocMapI;
-    }
-  }
+  // Finally, remove OldLocNo unless it is still used by some interval in the
+  // locInts map. One case when OldLocNo still is in use is when the register
+  // has been spilled. In such situations the spilled register is kept as a
+  // location until rewriteLocations is called (VirtRegMap is mapping the old
+  // register to the spill slot). So for a while we can have locations that map
+  // to virtual registers that have been removed from both the MachineFunction
+  // and from LiveIntervals.
+  removeLocationIfUnused(OldLocNo);
 
   LLVM_DEBUG({
     dbgs() << "Split result: \t";
@@ -1129,18 +1127,16 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
 
 void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
   bool DidChange = false;
-  if (auto *UserVals = lookupVirtReg(OldReg))
-    for (auto *UV : *UserVals)
-      DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
+  for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
+    DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
 
   if (!DidChange)
     return;
 
   // Map all of the new virtual registers.
-  if (auto *UserVals = lookupVirtReg(OldReg))
-    for (auto *UV : *UserVals)
-      for (unsigned i = 0; i != NewRegs.size(); ++i)
-        mapVirtReg(NewRegs[i], UV);
+  UserValue *UV = lookupVirtReg(OldReg);
+  for (unsigned i = 0; i != NewRegs.size(); ++i)
+    mapVirtReg(NewRegs[i], UV);
 }
 
 void LiveDebugVariables::
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 54ac46f2e7ce..930dc116205a 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -883,7 +883,8 @@ void LiveInterval::clearSubRanges() {
 static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
                                        LaneBitmask LaneMask,
                                        const SlotIndexes &Indexes,
-                                       const TargetRegisterInfo &TRI) {
+                                       const TargetRegisterInfo &TRI,
+                                       unsigned ComposeSubRegIdx) {
   // Phys reg should not be tracked at subreg level.
   // Same for noreg (Reg == 0).
   if (!Register::isVirtualRegister(Reg) || !Reg)
@@ -905,7 +906,12 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
         continue;
       if (MOI->getReg() != Reg)
         continue;
-      if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+      LaneBitmask OrigMask = TRI.getSubRegIndexLaneMask(MOI->getSubReg());
+      LaneBitmask ExpectedDefMask =
+          ComposeSubRegIdx
+              ? TRI.composeSubRegIndexLaneMask(ComposeSubRegIdx, OrigMask)
+              : OrigMask;
+      if ((ExpectedDefMask & LaneMask).none())
         continue;
       hasDef = true;
       break;
@@ -924,7 +930,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
 void LiveInterval::refineSubRanges(
     BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
     std::function<void(LiveInterval::SubRange &)> Apply,
-    const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) {
+    const SlotIndexes &Indexes, const TargetRegisterInfo &TRI,
+    unsigned ComposeSubRegIdx) {
   LaneBitmask ToApply = LaneMask;
   for (SubRange &SR : subranges()) {
     LaneBitmask SRMask = SR.LaneMask;
@@ -944,8 +951,10 @@ void LiveInterval::refineSubRanges(
       MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
       // Now that the subrange is split in half, make sure we
       // only keep in the subranges the VNIs that touch the related half.
-      stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI);
-      stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI);
+      stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI,
+                                 ComposeSubRegIdx);
+      stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI,
+                                 ComposeSubRegIdx);
     }
     Apply(*MatchingRange);
     ToApply &= ~Matching;
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 2989930ad093..9c80282bc59e 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -191,12 +191,12 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
 }
 
 /// Compute the live interval of a virtual register, based on defs and uses.
-void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
+bool LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
   assert(LRCalc && "LRCalc not initialized.");
   assert(LI.empty() && "Should only compute empty intervals.");
   LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
   LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
-  computeDeadValues(LI, nullptr);
+  return computeDeadValues(LI, nullptr);
 }
 
 void LiveIntervals::computeVirtRegs() {
@@ -204,7 +204,12 @@ void LiveIntervals::computeVirtRegs() {
     unsigned Reg = Register::index2VirtReg(i);
     if (MRI->reg_nodbg_empty(Reg))
       continue;
-    createAndComputeVirtRegInterval(Reg);
+    LiveInterval &LI = createEmptyInterval(Reg);
+    bool NeedSplit = computeVirtRegInterval(LI);
+    if (NeedSplit) {
+      SmallVector<LiveInterval*, 8> SplitLIs;
+      splitSeparateComponents(LI, SplitLIs);
+    }
   }
 }
 
@@ -500,6 +505,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
 bool LiveIntervals::computeDeadValues(LiveInterval &LI,
                                       SmallVectorImpl<MachineInstr*> *dead) {
   bool MayHaveSplitComponents = false;
+  bool HaveDeadDef = false;
+
   for (VNInfo *VNI : LI.valnos) {
     if (VNI->isUnused())
       continue;
@@ -530,6 +537,10 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
       MachineInstr *MI = getInstructionFromIndex(Def);
       assert(MI && "No instruction defining live value");
       MI->addRegisterDead(LI.reg, TRI);
+      if (HaveDeadDef)
+        MayHaveSplitComponents = true;
+      HaveDeadDef = true;
+
       if (dead && MI->allDefsAreDead()) {
         LLVM_DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
         dead->push_back(MI);
@@ -1061,9 +1072,9 @@ private:
       // Kill flags shouldn't be used while live intervals exist, they will be
       // reinserted by VirtRegRewriter.
       if (MachineInstr *KillMI = LIS.getInstructionFromIndex(OldIdxIn->end))
-        for (MIBundleOperands MO(*KillMI); MO.isValid(); ++MO)
-          if (MO->isReg() && MO->isUse())
-            MO->setIsKill(false);
+        for (MachineOperand &MOP : mi_bundle_ops(*KillMI))
+          if (MOP.isReg() && MOP.isUse())
+            MOP.setIsKill(false);
 
       // Is there a def before NewIdx which is not OldIdx?
       LiveRange::iterator Next = std::next(OldIdxIn);
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index c2a1cc7c6490..7a5cffca3470 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/LiveRegUnits.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
@@ -42,28 +43,23 @@ void LivePhysRegs::removeRegsInMask(const MachineOperand &MO,
 
 /// Remove defined registers and regmask kills from the set.
 void LivePhysRegs::removeDefs(const MachineInstr &MI) {
-  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
-    if (O->isReg()) {
-      if (!O->isDef() || O->isDebug())
-        continue;
-      Register Reg = O->getReg();
-      if (!Register::isPhysicalRegister(Reg))
-        continue;
-      removeReg(Reg);
-    } else if (O->isRegMask())
-      removeRegsInMask(*O);
+  for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+    if (MOP.isRegMask()) {
+      removeRegsInMask(MOP);
+      continue;
+    }
+
+    if (MOP.isDef())
+      removeReg(MOP.getReg());
   }
 }
 
 /// Add uses to the set.
 void LivePhysRegs::addUses(const MachineInstr &MI) {
-  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
-    if (!O->isReg() || !O->readsReg() || O->isDebug())
-      continue;
-    Register Reg = O->getReg();
-    if (!Register::isPhysicalRegister(Reg))
+  for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+    if (!MOP.isReg() || !MOP.readsReg())
       continue;
-    addReg(Reg);
+    addReg(MOP.getReg());
   }
 }
 
@@ -116,7 +112,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
   }
 }
 
-/// Prin the currently live registers to OS.
+/// Print the currently live registers to OS.
 void LivePhysRegs::print(raw_ostream &OS) const {
   OS << "Live Registers:";
   if (!TRI) {
diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp
index cbf112ee2bd5..2ebc8d7576d1 100644
--- a/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 72c79e5f8a75..08f046420fa1 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/LiveRegUnits.cpp b/llvm/lib/CodeGen/LiveRegUnits.cpp
index 97763def1f40..b2731aa0e7db 100644
--- a/llvm/lib/CodeGen/LiveRegUnits.cpp
+++ b/llvm/lib/CodeGen/LiveRegUnits.cpp
@@ -43,41 +43,34 @@ void LiveRegUnits::addRegsInMask(const uint32_t *RegMask) {
 
 void LiveRegUnits::stepBackward(const MachineInstr &MI) {
   // Remove defined registers and regmask kills from the set.
-  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
-    if (O->isReg()) {
-      if (!O->isDef() || O->isDebug())
-        continue;
-      Register Reg = O->getReg();
-      if (!Register::isPhysicalRegister(Reg))
-        continue;
-      removeReg(Reg);
-    } else if (O->isRegMask())
-      removeRegsNotPreserved(O->getRegMask());
+  for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+    if (MOP.isRegMask()) {
+      removeRegsNotPreserved(MOP.getRegMask());
+      continue;
+    }
+
+    if (MOP.isDef())
+      removeReg(MOP.getReg());
   }
 
   // Add uses to the set.
-  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
-    if (!O->isReg() || !O->readsReg() || O->isDebug())
-      continue;
-    Register Reg = O->getReg();
-    if (!Register::isPhysicalRegister(Reg))
+  for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+    if (!MOP.isReg() || !MOP.readsReg())
       continue;
-    addReg(Reg);
+    addReg(MOP.getReg());
   }
 }
 
 void LiveRegUnits::accumulate(const MachineInstr &MI) {
   // Add defs, uses and regmask clobbers to the set.
-  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
-    if (O->isReg()) {
-      Register Reg = O->getReg();
-      if (!Register::isPhysicalRegister(Reg))
-        continue;
-      if (!O->isDef() && !O->readsReg())
-        continue;
-      addReg(Reg);
-    } else if (O->isRegMask())
-      addRegsInMask(O->getRegMask());
+  for (const MachineOperand &MOP : phys_regs_and_masks(MI)) {
+    if (MOP.isRegMask()) {
+      addRegsInMask(MOP.getRegMask());
+      continue;
+    }
+    if (!MOP.isDef() && !MOP.readsReg())
+      continue;
+    addReg(MOP.getReg());
   }
 }
 
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 2392d4d00b56..5022726dc70a 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index ca0daa14fedf..40dfa696a2b9 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -24,14 +24,37 @@ LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
     if (NumElements == 1)
       return ScalarTy;
     return LLT::vector(NumElements, ScalarTy);
-  } else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
-    return LLT::pointer(PTy->getAddressSpace(), DL.getTypeSizeInBits(&Ty));
-  } else if (Ty.isSized()) {
+  }
+
+  if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+    unsigned AddrSpace = PTy->getAddressSpace();
+    return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+  }
+
+  if (Ty.isSized()) {
     // Aggregates are no different from real scalars as far as GlobalISel is
     // concerned.
     auto SizeInBits = DL.getTypeSizeInBits(&Ty);
     assert(SizeInBits != 0 && "invalid zero-sized type");
     return LLT::scalar(SizeInBits);
   }
+
   return LLT();
 }
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+  if (!Ty.isVector())
+    return MVT::getIntegerVT(Ty.getSizeInBits());
+
+  return MVT::getVectorVT(
+      MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+      Ty.getNumElements());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+  if (!Ty.isVector())
+    return LLT::scalar(Ty.getSizeInBits());
+
+  return LLT::vector(Ty.getVectorNumElements(),
+                     Ty.getVectorElementType().getSizeInBits());
+}
diff --git a/llvm/lib/CodeGen/LowerEmuTLS.cpp b/llvm/lib/CodeGen/LowerEmuTLS.cpp
index ed48365b0102..529d478756d4 100644
--- a/llvm/lib/CodeGen/LowerEmuTLS.cpp
+++ b/llvm/lib/CodeGen/LowerEmuTLS.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 
 using namespace llvm;
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index c9bb5461aa3c..5ef907b88315 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -48,10 +49,6 @@ static cl::opt<unsigned>
                                cl::value_desc("N"),
                                cl::desc("Function number to canonicalize."));
 
-static cl::opt<unsigned> CanonicalizeBasicBlockNumber(
-    "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
-    cl::desc("BasicBlock number to canonicalize."));
-
 namespace {
 
 class MIRCanonicalizer : public MachineFunctionPass {
@@ -373,34 +370,14 @@ static bool doDefKillClear(MachineBasicBlock *MBB) {
 }
 
 static bool runOnBasicBlock(MachineBasicBlock *MBB,
-                            std::vector<StringRef> &bbNames,
-                            unsigned &basicBlockNum, NamedVRegCursor &NVC) {
-
-  if (CanonicalizeBasicBlockNumber != ~0U) {
-    if (CanonicalizeBasicBlockNumber != basicBlockNum++)
-      return false;
-    LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
-                      << "\n";);
-  }
-
-  if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
-    LLVM_DEBUG({
-      dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
-             << "\n";
-    });
-    return false;
-  }
-
+                            unsigned BasicBlockNum, VRegRenamer &Renamer) {
   LLVM_DEBUG({
     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
     dbgs() << "\n\n================================================\n\n";
   });
 
   bool Changed = false;
-  MachineFunction &MF = *MBB->getParent();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  bbNames.push_back(MBB->getName());
   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
 
   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
@@ -413,32 +390,10 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
 
-  Changed |= NVC.renameVRegs(MBB);
-
-  // Here we renumber the def vregs for the idempotent instructions from the top
-  // of the MachineBasicBlock so that they are named in the order that we sorted
-  // them alphabetically. Eventually we wont need SkipVRegs because we will use
-  // named vregs instead.
-  if (IdempotentInstCount)
-    NVC.skipVRegs();
-
-  auto MII = MBB->begin();
-  for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
-    MachineInstr &MI = *MII++;
-    Changed = true;
-    Register vRegToRename = MI.getOperand(0).getReg();
-    auto Rename = NVC.createVirtualRegister(vRegToRename);
-
-    std::vector<MachineOperand *> RenameMOs;
-    for (auto &MO : MRI.reg_operands(vRegToRename)) {
-      RenameMOs.push_back(&MO);
-    }
-
-    for (auto *MO : RenameMOs) {
-      MO->setReg(Rename);
-    }
-  }
+  Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
 
+  // TODO: Consider dropping this. Dropping kill defs is probably not
+  // semantically sound.
   Changed |= doDefKillClear(MBB);
 
   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
@@ -470,16 +425,12 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
       << "\n\n================================================\n\n";);
 
-  std::vector<StringRef> BBNames;
-
   unsigned BBNum = 0;
-
   bool Changed = false;
-
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  NamedVRegCursor NVC(MRI);
+  VRegRenamer Renamer(MRI);
   for (auto MBB : RPOList)
-    Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
+    Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
 
   return Changed;
 }
diff --git a/llvm/lib/CodeGen/MIRNamerPass.cpp b/llvm/lib/CodeGen/MIRNamerPass.cpp
index 9d719f3917ce..9f61dd9ef243 100644
--- a/llvm/lib/CodeGen/MIRNamerPass.cpp
+++ b/llvm/lib/CodeGen/MIRNamerPass.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
@@ -54,11 +55,12 @@ public:
     if (MF.empty())
       return Changed;
 
-    NamedVRegCursor NVC(MF.getRegInfo());
+    VRegRenamer Renamer(MF.getRegInfo());
 
+    unsigned BBIndex = 0;
     ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
     for (auto &MBB : RPOT)
-      Changed |= NVC.renameVRegs(MBB);
+      Changed |= Renamer.renameVRegs(MBB, BBIndex++);
 
     return Changed;
   }
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index ad5c617623f2..5976f5da1569 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -204,7 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("nuw" , MIToken::kw_nuw)
       .Case("nsw" , MIToken::kw_nsw)
       .Case("exact" , MIToken::kw_exact)
-      .Case("fpexcept", MIToken::kw_fpexcept)
+      .Case("nofpexcept", MIToken::kw_nofpexcept)
       .Case("debug-location", MIToken::kw_debug_location)
       .Case("same_value", MIToken::kw_cfi_same_value)
       .Case("offset", MIToken::kw_cfi_offset)
@@ -242,6 +242,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("jump-table", MIToken::kw_jump_table)
       .Case("constant-pool", MIToken::kw_constant_pool)
       .Case("call-entry", MIToken::kw_call_entry)
+      .Case("custom", MIToken::kw_custom)
       .Case("liveout", MIToken::kw_liveout)
       .Case("address-taken", MIToken::kw_address_taken)
       .Case("landing-pad", MIToken::kw_landing_pad)
@@ -252,6 +253,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("shufflemask", MIToken::kw_shufflemask)
       .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
       .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
+      .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
       .Case("unknown-size", MIToken::kw_unknown_size)
       .Default(MIToken::Identifier);
 }
@@ -582,8 +584,8 @@ static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
       .Default(MIToken::Error);
 }
 
-static Cursor maybeLexExlaim(Cursor C, MIToken &Token,
-                             ErrorCallbackType ErrorCallback) {
+static Cursor maybeLexExclaim(Cursor C, MIToken &Token,
+                              ErrorCallbackType ErrorCallback) {
   if (C.peek() != '!')
     return None;
   auto Range = C;
@@ -719,7 +721,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
     return R.remaining();
   if (Cursor R = maybeLexNumericalLiteral(C, Token))
     return R.remaining();
-  if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
+  if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback))
     return R.remaining();
   if (Cursor R = maybeLexSymbol(C, Token))
     return R.remaining();
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 200f9d026cc8..aaffe4a4c91b 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -73,7 +73,7 @@ struct MIToken {
     kw_nuw,
     kw_nsw,
     kw_exact,
-    kw_fpexcept,
+    kw_nofpexcept,
     kw_debug_location,
     kw_cfi_same_value,
     kw_cfi_offset,
@@ -110,6 +110,7 @@ struct MIToken {
     kw_jump_table,
     kw_constant_pool,
     kw_call_entry,
+    kw_custom,
     kw_liveout,
     kw_address_taken,
     kw_landing_pad,
@@ -120,6 +121,7 @@ struct MIToken {
     kw_shufflemask,
     kw_pre_instr_symbol,
     kw_post_instr_symbol,
+    kw_heap_alloc_marker,
     kw_unknown_size,
 
     // Named metadata keywords
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 6498acc9fa51..076ca943788b 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -28,6 +28,7 @@
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRFormatter.h"
 #include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -343,6 +344,37 @@ VRegInfo &PerFunctionMIParsingState::getVRegInfoNamed(StringRef RegName) {
   return *I.first->second;
 }
 
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+                           DenseMap<unsigned, const Value *> &Slots2Values) {
+  int Slot = MST.getLocalSlot(V);
+  if (Slot == -1)
+    return;
+  Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+                             DenseMap<unsigned, const Value *> &Slots2Values) {
+  ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+  MST.incorporateFunction(F);
+  for (const auto &Arg : F.args())
+    mapValueToSlot(&Arg, MST, Slots2Values);
+  for (const auto &BB : F) {
+    mapValueToSlot(&BB, MST, Slots2Values);
+    for (const auto &I : BB)
+      mapValueToSlot(&I, MST, Slots2Values);
+  }
+}
+
+const Value* PerFunctionMIParsingState::getIRValue(unsigned Slot) {
+  if (Slots2Values.empty())
+    initSlots2Values(MF.getFunction(), Slots2Values);
+  auto ValueInfo = Slots2Values.find(Slot);
+  if (ValueInfo == Slots2Values.end())
+    return nullptr;
+  return ValueInfo->second;
+}
+
 namespace {
 
 /// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -370,8 +402,6 @@ class MIParser {
   PerFunctionMIParsingState &PFS;
   /// Maps from slot numbers to function's unnamed basic blocks.
   DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
-  /// Maps from slot numbers to function's unnamed values.
-  DenseMap<unsigned, const Value *> Slots2Values;
 
 public:
   MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -455,9 +485,12 @@ public:
   bool parseTargetIndexOperand(MachineOperand &Dest);
   bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
   bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
-  bool parseMachineOperand(MachineOperand &Dest,
+  bool parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+                           MachineOperand &Dest,
                            Optional<unsigned> &TiedDefIdx);
-  bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
+  bool parseMachineOperandAndTargetFlags(const unsigned OpCode,
+                                         const unsigned OpIdx,
+                                         MachineOperand &Dest,
                                          Optional<unsigned> &TiedDefIdx);
   bool parseOffset(int64_t &Offset);
   bool parseAlignment(unsigned &Alignment);
@@ -471,6 +504,10 @@ public:
   bool parseOptionalAtomicOrdering(AtomicOrdering &Order);
   bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
   bool parsePreOrPostInstrSymbol(MCSymbol *&Symbol);
+  bool parseHeapAllocMarker(MDNode *&Node);
+
+  bool parseTargetImmMnemonic(const unsigned OpCode, const unsigned OpIdx,
+                              MachineOperand &Dest, const MIRFormatter &MF);
 
 private:
   /// Convert the integer literal in the current token into an unsigned integer.
@@ -508,8 +545,6 @@ private:
   const BasicBlock *getIRBlock(unsigned Slot);
   const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
 
-  const Value *getIRValue(unsigned Slot);
-
   /// Get or create an MCSymbol for a given name.
   MCSymbol *getOrCreateMCSymbol(StringRef Name);
 
@@ -550,6 +585,9 @@ bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
   return true;
 }
 
+typedef function_ref<bool(StringRef::iterator Loc, const Twine &)>
+    ErrorCallbackType;
+
 static const char *toString(MIToken::TokenKind TokenKind) {
   switch (TokenKind) {
   case MIToken::comma:
@@ -906,11 +944,12 @@ bool MIParser::parse(MachineInstr *&MI) {
   // Parse the remaining machine operands.
   while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_pre_instr_symbol) &&
          Token.isNot(MIToken::kw_post_instr_symbol) &&
+         Token.isNot(MIToken::kw_heap_alloc_marker) &&
          Token.isNot(MIToken::kw_debug_location) &&
          Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
     auto Loc = Token.location();
     Optional<unsigned> TiedDefIdx;
-    if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
+    if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
       return true;
     if (OpCode == TargetOpcode::DBG_VALUE && MO.isReg())
       MO.setIsDebug();
@@ -932,6 +971,10 @@ bool MIParser::parse(MachineInstr *&MI) {
   if (Token.is(MIToken::kw_post_instr_symbol))
     if (parsePreOrPostInstrSymbol(PostInstrSymbol))
       return true;
+  MDNode *HeapAllocMarker = nullptr;
+  if (Token.is(MIToken::kw_heap_alloc_marker))
+    if (parseHeapAllocMarker(HeapAllocMarker))
+      return true;
 
   DebugLoc DebugLocation;
   if (Token.is(MIToken::kw_debug_location)) {
@@ -985,6 +1028,8 @@ bool MIParser::parse(MachineInstr *&MI) {
     MI->setPreInstrSymbol(MF, PreInstrSymbol);
   if (PostInstrSymbol)
     MI->setPostInstrSymbol(MF, PostInstrSymbol);
+  if (HeapAllocMarker)
+    MI->setHeapAllocMarker(MF, HeapAllocMarker);
   if (!MemOperands.empty())
     MI->setMemRefs(MF, MemOperands);
   return false;
@@ -1138,7 +1183,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
          Token.is(MIToken::kw_nuw) ||
          Token.is(MIToken::kw_nsw) ||
          Token.is(MIToken::kw_exact) ||
-         Token.is(MIToken::kw_fpexcept)) {
+         Token.is(MIToken::kw_nofpexcept)) {
     // Mine frame and fast math flags
     if (Token.is(MIToken::kw_frame_setup))
       Flags |= MachineInstr::FrameSetup;
@@ -1164,8 +1209,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
       Flags |= MachineInstr::NoSWrap;
     if (Token.is(MIToken::kw_exact))
       Flags |= MachineInstr::IsExact;
-    if (Token.is(MIToken::kw_fpexcept))
-      Flags |= MachineInstr::FPExcept;
+    if (Token.is(MIToken::kw_nofpexcept))
+      Flags |= MachineInstr::NoFPExcept;
 
     lex();
   }
@@ -1485,17 +1530,61 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
   return false;
 }
 
-bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
-                               const Constant *&C) {
+bool MIParser::parseTargetImmMnemonic(const unsigned OpCode,
+                                      const unsigned OpIdx,
+                                      MachineOperand &Dest,
+                                      const MIRFormatter &MF) {
+  assert(Token.is(MIToken::dot));
+  auto Loc = Token.location(); // record start position
+  size_t Len = 1;              // for "."
+  lex();
+
+  // Handle the case that mnemonic starts with number.
+  if (Token.is(MIToken::IntegerLiteral)) {
+    Len += Token.range().size();
+    lex();
+  }
+
+  StringRef Src;
+  if (Token.is(MIToken::comma))
+    Src = StringRef(Loc, Len);
+  else {
+    assert(Token.is(MIToken::Identifier));
+    Src = StringRef(Loc, Len + Token.stringValue().size());
+  }
+  int64_t Val;
+  if (MF.parseImmMnemonic(OpCode, OpIdx, Src, Val,
+                          [this](StringRef::iterator Loc, const Twine &Msg)
+                              -> bool { return error(Loc, Msg); }))
+    return true;
+
+  Dest = MachineOperand::CreateImm(Val);
+  if (!Token.is(MIToken::comma))
+    lex();
+  return false;
+}
+
+static bool parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+                            PerFunctionMIParsingState &PFS, const Constant *&C,
+                            ErrorCallbackType ErrCB) {
   auto Source = StringValue.str(); // The source has to be null terminated.
   SMDiagnostic Err;
-  C = parseConstantValue(Source, Err, *MF.getFunction().getParent(),
+  C = parseConstantValue(Source, Err, *PFS.MF.getFunction().getParent(),
                          &PFS.IRSlots);
   if (!C)
-    return error(Loc + Err.getColumnNo(), Err.getMessage());
+    return ErrCB(Loc + Err.getColumnNo(), Err.getMessage());
   return false;
 }
 
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+                               const Constant *&C) {
+  return ::parseIRConstant(
+      Loc, StringValue, PFS, C,
+      [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+        return error(Loc, Msg);
+      });
+}
+
 bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
   if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
     return true;
@@ -1628,27 +1717,52 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
   return false;
 }
 
-bool MIParser::getUnsigned(unsigned &Result) {
+static bool getHexUint(const MIToken &Token, APInt &Result) {
+  assert(Token.is(MIToken::HexLiteral));
+  StringRef S = Token.range();
+  assert(S[0] == '0' && tolower(S[1]) == 'x');
+  // This could be a floating point literal with a special prefix.
+  if (!isxdigit(S[2]))
+    return true;
+  StringRef V = S.substr(2);
+  APInt A(V.size()*4, V, 16);
+
+  // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
+  // sure it isn't the case before constructing result.
+  unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
+  Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
+  return false;
+}
+
+static bool getUnsigned(const MIToken &Token, unsigned &Result,
+                        ErrorCallbackType ErrCB) {
   if (Token.hasIntegerValue()) {
     const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
     uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
     if (Val64 == Limit)
-      return error("expected 32-bit integer (too large)");
+      return ErrCB(Token.location(), "expected 32-bit integer (too large)");
     Result = Val64;
     return false;
   }
   if (Token.is(MIToken::HexLiteral)) {
     APInt A;
-    if (getHexUint(A))
+    if (getHexUint(Token, A))
       return true;
     if (A.getBitWidth() > 32)
-      return error("expected 32-bit integer (too large)");
+      return ErrCB(Token.location(), "expected 32-bit integer (too large)");
     Result = A.getZExtValue();
     return false;
   }
   return true;
 }
 
+bool MIParser::getUnsigned(unsigned &Result) {
+  return ::getUnsigned(
+      Token, Result, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+        return error(Loc, Msg);
+      });
+}
+
 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
   assert(Token.is(MIToken::MachineBasicBlock) ||
          Token.is(MIToken::MachineBasicBlockLabel));
@@ -1728,23 +1842,25 @@ bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
   return false;
 }
 
-bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+static bool parseGlobalValue(const MIToken &Token,
+                             PerFunctionMIParsingState &PFS, GlobalValue *&GV,
+                             ErrorCallbackType ErrCB) {
   switch (Token.kind()) {
   case MIToken::NamedGlobalValue: {
-    const Module *M = MF.getFunction().getParent();
+    const Module *M = PFS.MF.getFunction().getParent();
     GV = M->getNamedValue(Token.stringValue());
     if (!GV)
-      return error(Twine("use of undefined global value '") + Token.range() +
-                   "'");
+      return ErrCB(Token.location(), Twine("use of undefined global value '") +
+                                         Token.range() + "'");
     break;
   }
   case MIToken::GlobalValue: {
     unsigned GVIdx;
-    if (getUnsigned(GVIdx))
+    if (getUnsigned(Token, GVIdx, ErrCB))
       return true;
     if (GVIdx >= PFS.IRSlots.GlobalValues.size())
-      return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
-                   "'");
+      return ErrCB(Token.location(), Twine("use of undefined global value '@") +
+                                         Twine(GVIdx) + "'");
     GV = PFS.IRSlots.GlobalValues[GVIdx];
     break;
   }
@@ -1754,6 +1870,14 @@ bool MIParser::parseGlobalValue(GlobalValue *&GV) {
   return false;
 }
 
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
+  return ::parseGlobalValue(
+      Token, PFS, GV,
+      [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+        return error(Loc, Msg);
+      });
+}
+
 bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
   GlobalValue *GV = nullptr;
   if (parseGlobalValue(GV))
@@ -2295,23 +2419,13 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
   if (expectAndConsume(MIToken::lparen))
     return error("expected syntax shufflemask(<integer or undef>, ...)");
 
-  SmallVector<Constant *, 32> ShufMask;
-  LLVMContext &Ctx = MF.getFunction().getContext();
-  Type *I32Ty = Type::getInt32Ty(Ctx);
-
-  bool AllZero = true;
-  bool AllUndef = true;
-
+  SmallVector<int, 32> ShufMask;
   do {
     if (Token.is(MIToken::kw_undef)) {
-      ShufMask.push_back(UndefValue::get(I32Ty));
-      AllZero = false;
+      ShufMask.push_back(-1);
     } else if (Token.is(MIToken::IntegerLiteral)) {
-      AllUndef = false;
       const APSInt &Int = Token.integerValue();
-      if (!Int.isNullValue())
-        AllZero = false;
-      ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue()));
+      ShufMask.push_back(Int.getExtValue());
     } else
       return error("expected integer constant");
 
@@ -2321,13 +2435,8 @@ bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
   if (expectAndConsume(MIToken::rparen))
     return error("shufflemask should be terminated by ')'.");
 
-  if (AllZero || AllUndef) {
-    VectorType *VT = VectorType::get(I32Ty, ShufMask.size());
-    Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT);
-    Dest = MachineOperand::CreateShuffleMask(C);
-  } else
-    Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask));
-
+  ArrayRef<int> MaskAlloc = MF.allocateShuffleMask(ShufMask);
+  Dest = MachineOperand::CreateShuffleMask(MaskAlloc);
   return false;
 }
 
@@ -2402,7 +2511,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
   return false;
 }
 
-bool MIParser::parseMachineOperand(MachineOperand &Dest,
+bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
+                                   MachineOperand &Dest,
                                    Optional<unsigned> &TiedDefIdx) {
   switch (Token.kind()) {
   case MIToken::kw_implicit:
@@ -2491,6 +2601,13 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
       return parseCustomRegisterMaskOperand(Dest);
     } else
       return parseTypedImmediateOperand(Dest);
+  case MIToken::dot: {
+    const auto *TII = MF.getSubtarget().getInstrInfo();
+    if (const auto *Formatter = TII->getMIRFormatter()) {
+      return parseTargetImmMnemonic(OpCode, OpIdx, Dest, *Formatter);
+    }
+    LLVM_FALLTHROUGH;
+  }
   default:
     // FIXME: Parse the MCSymbol machine operand.
     return error("expected a machine operand");
@@ -2499,7 +2616,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
 }
 
 bool MIParser::parseMachineOperandAndTargetFlags(
-    MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
+    const unsigned OpCode, const unsigned OpIdx, MachineOperand &Dest,
+    Optional<unsigned> &TiedDefIdx) {
   unsigned TF = 0;
   bool HasTargetFlags = false;
   if (Token.is(MIToken::kw_target_flags)) {
@@ -2531,7 +2649,7 @@ bool MIParser::parseMachineOperandAndTargetFlags(
       return true;
   }
   auto Loc = Token.location();
-  if (parseMachineOperand(Dest, TiedDefIdx))
+  if (parseMachineOperand(OpCode, OpIdx, Dest, TiedDefIdx))
     return true;
   if (!HasTargetFlags)
     return false;
@@ -2592,30 +2710,31 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
   return false;
 }
 
-bool MIParser::parseIRValue(const Value *&V) {
+static bool parseIRValue(const MIToken &Token, PerFunctionMIParsingState &PFS,
+                         const Value *&V, ErrorCallbackType ErrCB) {
   switch (Token.kind()) {
   case MIToken::NamedIRValue: {
-    V = MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
+    V = PFS.MF.getFunction().getValueSymbolTable()->lookup(Token.stringValue());
     break;
   }
   case MIToken::IRValue: {
     unsigned SlotNumber = 0;
-    if (getUnsigned(SlotNumber))
+    if (getUnsigned(Token, SlotNumber, ErrCB))
       return true;
-    V = getIRValue(SlotNumber);
+    V = PFS.getIRValue(SlotNumber);
     break;
   }
   case MIToken::NamedGlobalValue:
   case MIToken::GlobalValue: {
     GlobalValue *GV = nullptr;
-    if (parseGlobalValue(GV))
+    if (parseGlobalValue(Token, PFS, GV, ErrCB))
       return true;
     V = GV;
     break;
   }
   case MIToken::QuotedIRValue: {
     const Constant *C = nullptr;
-    if (parseIRConstant(Token.location(), Token.stringValue(), C))
+    if (parseIRConstant(Token.location(), Token.stringValue(), PFS, C, ErrCB))
       return true;
     V = C;
     break;
@@ -2624,10 +2743,17 @@ bool MIParser::parseIRValue(const Value *&V) {
     llvm_unreachable("The current token should be an IR block reference");
   }
   if (!V)
-    return error(Twine("use of undefined IR value '") + Token.range() + "'");
+    return ErrCB(Token.location(), Twine("use of undefined IR value '") + Token.range() + "'");
   return false;
 }
 
+bool MIParser::parseIRValue(const Value *&V) {
+  return ::parseIRValue(
+      Token, PFS, V, [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+        return error(Loc, Msg);
+      });
+}
+
 bool MIParser::getUint64(uint64_t &Result) {
   if (Token.hasIntegerValue()) {
     if (Token.integerValue().getActiveBits() > 64)
@@ -2648,20 +2774,7 @@ bool MIParser::getUint64(uint64_t &Result) {
 }
 
 bool MIParser::getHexUint(APInt &Result) {
-  assert(Token.is(MIToken::HexLiteral));
-  StringRef S = Token.range();
-  assert(S[0] == '0' && tolower(S[1]) == 'x');
-  // This could be a floating point literal with a special prefix.
-  if (!isxdigit(S[2]))
-    return true;
-  StringRef V = S.substr(2);
-  APInt A(V.size()*4, V, 16);
-
-  // If A is 0, then A.getActiveBits() is 0. This isn't a valid bitwidth. Make
-  // sure it isn't the case before constructing result.
-  unsigned NumBits = (A == 0) ? 32 : A.getActiveBits();
-  Result = APInt(NumBits, ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
-  return false;
+  return ::getHexUint(Token, Result);
 }
 
 bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
@@ -2748,6 +2861,20 @@ bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
           "expected a global value or an external symbol after 'call-entry'");
     }
     break;
+  case MIToken::kw_custom: {
+    lex();
+    const auto *TII = MF.getSubtarget().getInstrInfo();
+    if (const auto *Formatter = TII->getMIRFormatter()) {
+      if (Formatter->parseCustomPseudoSourceValue(
+              Token.stringValue(), MF, PFS, PSV,
+              [this](StringRef::iterator Loc, const Twine &Msg) -> bool {
+                return error(Loc, Msg);
+              }))
+        return true;
+    } else
+      return error("unable to parse target custom pseudo source value");
+    break;
+  }
   default:
     llvm_unreachable("The current token should be pseudo source value");
   }
@@ -2759,7 +2886,7 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
   if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
       Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
       Token.is(MIToken::FixedStackObject) || Token.is(MIToken::StackObject) ||
-      Token.is(MIToken::kw_call_entry)) {
+      Token.is(MIToken::kw_call_entry) || Token.is(MIToken::kw_custom)) {
     const PseudoSourceValue *PSV = nullptr;
     if (parseMemoryPseudoSourceValue(PSV))
       return true;
@@ -2956,6 +3083,22 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
   return false;
 }
 
+bool MIParser::parseHeapAllocMarker(MDNode *&Node) {
+  assert(Token.is(MIToken::kw_heap_alloc_marker) &&
+         "Invalid token for a heap alloc marker!");
+  lex();
+  parseMDNode(Node);
+  if (!Node)
+    return error("expected a MDNode after 'heap-alloc-marker'");
+  if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+      Token.is(MIToken::lbrace))
+    return false;
+  if (Token.isNot(MIToken::comma))
+    return error("expected ',' before the next machine operand");
+  lex();
+  return false;
+}
+
 static void initSlots2BasicBlocks(
     const Function &F,
     DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
@@ -2994,37 +3137,6 @@ const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
   return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
 }
 
-static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
-                           DenseMap<unsigned, const Value *> &Slots2Values) {
-  int Slot = MST.getLocalSlot(V);
-  if (Slot == -1)
-    return;
-  Slots2Values.insert(std::make_pair(unsigned(Slot), V));
-}
-
-/// Creates the mapping from slot numbers to function's unnamed IR values.
-static void initSlots2Values(const Function &F,
-                             DenseMap<unsigned, const Value *> &Slots2Values) {
-  ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
-  MST.incorporateFunction(F);
-  for (const auto &Arg : F.args())
-    mapValueToSlot(&Arg, MST, Slots2Values);
-  for (const auto &BB : F) {
-    mapValueToSlot(&BB, MST, Slots2Values);
-    for (const auto &I : BB)
-      mapValueToSlot(&I, MST, Slots2Values);
-  }
-}
-
-const Value *MIParser::getIRValue(unsigned Slot) {
-  if (Slots2Values.empty())
-    initSlots2Values(MF.getFunction(), Slots2Values);
-  auto ValueInfo = Slots2Values.find(Slot);
-  if (ValueInfo == Slots2Values.end())
-    return nullptr;
-  return ValueInfo->second;
-}
-
 MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
   // FIXME: Currently we can't recognize temporary or local symbols and call all
   // of the appropriate forms to create them. However, this handles basic cases
@@ -3087,3 +3199,15 @@ bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
                        MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
 }
+
+bool MIRFormatter::parseIRValue(StringRef Src, MachineFunction &MF,
+                                PerFunctionMIParsingState &PFS, const Value *&V,
+                                ErrorCallbackType ErrorCallback) {
+  MIToken Token;
+  Src = lexMIToken(Src, Token, [&](StringRef::iterator Loc, const Twine &Msg) {
+    ErrorCallback(Loc, Msg);
+  });
+  V = nullptr;
+
+  return ::parseIRValue(Token, PFS, V, ErrorCallback);
+}
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 55fac93d8991..10157c746b46 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -64,9 +64,12 @@ class MIRParserImpl {
   /// parts.
   bool NoMIRDocuments = false;
 
+  std::function<void(Function &)> ProcessIRFunction;
+
 public:
-  MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
-                StringRef Filename, LLVMContext &Context);
+  MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename,
+                LLVMContext &Context,
+                std::function<void(Function &)> ProcessIRFunction);
 
   void reportDiagnostic(const SMDiagnostic &Diag);
 
@@ -92,6 +95,9 @@ public:
   /// Return null if an error occurred.
   std::unique_ptr<Module> parseIRModule();
 
+  /// Create an empty function with the given name.
+  Function *createDummyFunction(StringRef Name, Module &M);
+
   bool parseMachineFunctions(Module &M, MachineModuleInfo &MMI);
 
   /// Parse the machine function in the current YAML document.
@@ -163,13 +169,13 @@ static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) {
 }
 
 MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents,
-                             StringRef Filename, LLVMContext &Context)
+                             StringRef Filename, LLVMContext &Context,
+                             std::function<void(Function &)> Callback)
     : SM(),
-      In(SM.getMemoryBuffer(
-            SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))->getBuffer(),
-            nullptr, handleYAMLDiag, this),
-      Filename(Filename),
-      Context(Context) {
+      In(SM.getMemoryBuffer(SM.AddNewSourceBuffer(std::move(Contents), SMLoc()))
+             ->getBuffer(),
+         nullptr, handleYAMLDiag, this),
+      Filename(Filename), Context(Context), ProcessIRFunction(Callback) {
   In.setContext(&In);
 }
 
@@ -256,14 +262,17 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
   return false;
 }
 
-/// Create an empty function with the given name.
-static Function *createDummyFunction(StringRef Name, Module &M) {
+Function *MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
   auto &Context = M.getContext();
   Function *F =
       Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
                        Function::ExternalLinkage, Name, M);
   BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
   new UnreachableInst(Context, BB);
+
+  if (ProcessIRFunction)
+    ProcessIRFunction(*F);
+
   return F;
 }
 
@@ -925,21 +934,23 @@ bool MIRParser::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
   return Impl->parseMachineFunctions(M, MMI);
 }
 
-std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename,
-                                                         SMDiagnostic &Error,
-                                                         LLVMContext &Context) {
+std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(
+    StringRef Filename, SMDiagnostic &Error, LLVMContext &Context,
+    std::function<void(Function &)> ProcessIRFunction) {
   auto FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename);
   if (std::error_code EC = FileOrErr.getError()) {
     Error = SMDiagnostic(Filename, SourceMgr::DK_Error,
                          "Could not open input file: " + EC.message());
     return nullptr;
   }
-  return createMIRParser(std::move(FileOrErr.get()), Context);
+  return createMIRParser(std::move(FileOrErr.get()), Context,
+                         ProcessIRFunction);
 }
 
 std::unique_ptr<MIRParser>
 llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
-                      LLVMContext &Context) {
+                      LLVMContext &Context,
+                      std::function<void(Function &)> ProcessIRFunction) {
   auto Filename = Contents->getBufferIdentifier();
   if (Context.shouldDiscardValueNames()) {
     Context.diagnose(DiagnosticInfoMIRParser(
@@ -949,6 +960,6 @@ llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
             "Can't read MIR with a Context that discards named Values")));
     return nullptr;
   }
-  return std::make_unique<MIRParser>(
-      std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
+  return std::make_unique<MIRParser>(std::make_unique<MIRParserImpl>(
+      std::move(Contents), Filename, Context, ProcessIRFunction));
 }
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 1a4e21ac06a9..e8cd3d60ccb1 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -752,8 +752,8 @@ void MIPrinter::print(const MachineInstr &MI) {
     OS << "nsw ";
   if (MI.getFlag(MachineInstr::IsExact))
     OS << "exact ";
-  if (MI.getFlag(MachineInstr::FPExcept))
-    OS << "fpexcept ";
+  if (MI.getFlag(MachineInstr::NoFPExcept))
+    OS << "nofpexcept ";
 
   OS << TII->getName(MI.getOpcode());
   if (I < E)
@@ -784,6 +784,13 @@ void MIPrinter::print(const MachineInstr &MI) {
     MachineOperand::printSymbol(OS, *PostInstrSymbol);
     NeedComma = true;
   }
+  if (MDNode *HeapAllocMarker = MI.getHeapAllocMarker()) {
+    if (NeedComma)
+      OS << ',';
+    OS << " heap-alloc-marker ";
+    HeapAllocMarker->printAsOperand(OS, MST);
+    NeedComma = true;
+  }
 
   if (const DebugLoc &DL = MI.getDebugLoc()) {
     if (NeedComma)
@@ -849,7 +856,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
     if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
       TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
     const TargetIntrinsicInfo *TII = MI.getMF()->getTarget().getIntrinsicInfo();
-    Op.print(OS, MST, TypeToPrint, PrintDef, /*IsStandalone=*/false,
+    Op.print(OS, MST, TypeToPrint, OpIdx, PrintDef, /*IsStandalone=*/false,
              ShouldPrintRegisterTies, TiedOperandIdx, TRI, TII);
     break;
   }
@@ -867,6 +874,28 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
   }
 }
 
+void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V,
+                                ModuleSlotTracker &MST) {
+  if (isa<GlobalValue>(V)) {
+    V.printAsOperand(OS, /*PrintType=*/false, MST);
+    return;
+  }
+  if (isa<Constant>(V)) {
+    // Machine memory operands can load/store to/from constant value pointers.
+    OS << '`';
+    V.printAsOperand(OS, /*PrintType=*/true, MST);
+    OS << '`';
+    return;
+  }
+  OS << "%ir.";
+  if (V.hasName()) {
+    printLLVMNameWithoutPrefix(OS, V.getName());
+    return;
+  }
+  int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
+  MachineOperand::printIRSlotNumber(OS, Slot);
+}
+
 void llvm::printMIR(raw_ostream &OS, const Module &M) {
   yaml::Output Out(OS);
   Out << const_cast<Module &>(M);
diff --git a/llvm/lib/CodeGen/MIRPrintingPass.cpp b/llvm/lib/CodeGen/MIRPrintingPass.cpp
index e032fffd658c..1b5a9ade0871 100644
--- a/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MIRPrinter.h"
-
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index 6629000f468f..fcc40b26c527 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -13,336 +13,144 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mir-vregnamer-utils"
 
-namespace {
+using VRegRenameMap = std::map<unsigned, unsigned>;
 
-// TypedVReg and VRType are used to tell the renamer what to do at points in a
-// sequence of values to be renamed. A TypedVReg can either contain
-// an actual VReg, a FrameIndex, or it could just be a barrier for the next
-// candidate (side-effecting instruction). This tells the renamer to increment
-// to the next vreg name, or to skip modulo some skip-gap value.
-enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
-class TypedVReg {
-  VRType Type;
-  Register Reg;
-
-public:
-  TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {}
-  TypedVReg(VRType Type) : Type(Type), Reg(~0U) {
-    assert(Type != RSE_Reg && "Expected a non-Register Type.");
-  }
-
-  bool isReg() const { return Type == RSE_Reg; }
-  bool isFrameIndex() const { return Type == RSE_FrameIndex; }
-  bool isCandidate() const { return Type == RSE_NewCandidate; }
-
-  VRType getType() const { return Type; }
-  Register getReg() const {
-    assert(this->isReg() && "Expected a virtual or physical Register.");
-    return Reg;
-  }
-};
-
-/// Here we find our candidates. What makes an interesting candidate?
-/// A candidate for a canonicalization tree root is normally any kind of
-/// instruction that causes side effects such as a store to memory or a copy to
-/// a physical register or a return instruction. We use these as an expression
-/// tree root that we walk in order to build a canonical walk which should
-/// result in canonical vreg renaming.
-std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
-  std::vector<MachineInstr *> Candidates;
-  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
-  for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
-    MachineInstr *MI = &*II;
-
-    bool DoesMISideEffect = false;
-
-    if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
-      const Register Dst = MI->getOperand(0).getReg();
-      DoesMISideEffect |= !Register::isVirtualRegister(Dst);
-
-      for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
-        if (DoesMISideEffect)
-          break;
-        DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
-      }
-    }
-
-    if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
-      continue;
+bool VRegRenamer::doVRegRenaming(const VRegRenameMap &VRM) {
+  bool Changed = false;
 
-    LLVM_DEBUG(dbgs() << "Found Candidate:  "; MI->dump(););
-    Candidates.push_back(MI);
+  for (const auto &E : VRM) {
+    Changed = Changed || !MRI.reg_empty(E.first);
+    MRI.replaceRegWith(E.first, E.second);
   }
 
-  return Candidates;
-}
-
-void doCandidateWalk(std::vector<TypedVReg> &VRegs,
-                     std::queue<TypedVReg> &RegQueue,
-                     std::vector<MachineInstr *> &VisitedMIs,
-                     const MachineBasicBlock *MBB) {
-
-  const MachineFunction &MF = *MBB->getParent();
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  while (!RegQueue.empty()) {
-
-    auto TReg = RegQueue.front();
-    RegQueue.pop();
-
-    if (TReg.isFrameIndex()) {
-      LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
-      VRegs.push_back(TypedVReg(RSE_FrameIndex));
-      continue;
-    }
-
-    assert(TReg.isReg() && "Expected vreg or physreg.");
-    Register Reg = TReg.getReg();
-
-    if (Register::isVirtualRegister(Reg)) {
-      LLVM_DEBUG({
-        dbgs() << "Popping vreg ";
-        MRI.def_begin(Reg)->dump();
-        dbgs() << "\n";
-      });
-
-      if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
-            return TR.isReg() && TR.getReg() == Reg;
-          })) {
-        VRegs.push_back(TypedVReg(Reg));
-      }
-    } else {
-      LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
-      VRegs.push_back(TypedVReg(Reg));
-      continue;
-    }
-
-    for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
-      MachineInstr *Def = RI->getParent();
-
-      if (Def->getParent() != MBB)
-        continue;
-
-      if (llvm::any_of(VisitedMIs,
-                       [&](const MachineInstr *VMI) { return Def == VMI; })) {
-        break;
-      }
-
-      LLVM_DEBUG({
-        dbgs() << "\n========================\n";
-        dbgs() << "Visited MI: ";
-        Def->dump();
-        dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
-        dbgs() << "\n========================\n";
-      });
-      VisitedMIs.push_back(Def);
-      for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
-
-        MachineOperand &MO = Def->getOperand(I);
-        if (MO.isFI()) {
-          LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
-          RegQueue.push(TypedVReg(RSE_FrameIndex));
-        }
-
-        if (!MO.isReg())
-          continue;
-        RegQueue.push(TypedVReg(MO.getReg()));
-      }
-    }
-  }
+  return Changed;
 }
 
-std::map<unsigned, unsigned>
-getVRegRenameMap(const std::vector<TypedVReg> &VRegs,
-                 const std::vector<Register> &renamedInOtherBB,
-                 MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
-  std::map<unsigned, unsigned> VRegRenameMap;
-  bool FirstCandidate = true;
-
-  for (auto &vreg : VRegs) {
-    if (vreg.isFrameIndex()) {
-      // We skip one vreg for any frame index because there is a good chance
-      // (especially when comparing SelectionDAG to GlobalISel generated MIR)
-      // that in the other file we are just getting an incoming vreg that comes
-      // from a copy from a frame index. So it's safe to skip by one.
-      unsigned LastRenameReg = NVC.incrementVirtualVReg();
-      (void)LastRenameReg;
-      LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
-      continue;
-    } else if (vreg.isCandidate()) {
-
-      // After the first candidate, for every subsequent candidate, we skip mod
-      // 10 registers so that the candidates are more likely to start at the
-      // same vreg number making it more likely that the canonical walk from the
-      // candidate insruction. We don't need to skip from the first candidate of
-      // the BasicBlock because we already skip ahead several vregs for each BB.
-      unsigned LastRenameReg = NVC.getVirtualVReg();
-      if (FirstCandidate)
-        NVC.incrementVirtualVReg(LastRenameReg % 10);
-      FirstCandidate = false;
-      continue;
-    } else if (!Register::isVirtualRegister(vreg.getReg())) {
-      unsigned LastRenameReg = NVC.incrementVirtualVReg();
-      (void)LastRenameReg;
-      LLVM_DEBUG({
-        dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
-      });
-      continue;
-    }
-
-    auto Reg = vreg.getReg();
-    if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
-      LLVM_DEBUG(dbgs() << "Vreg " << Reg
-                        << " already renamed in other BB.\n";);
-      continue;
-    }
+VRegRenameMap
+VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
 
-    auto Rename = NVC.createVirtualRegister(Reg);
+  StringMap<unsigned> VRegNameCollisionMap;
 
-    if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
-      LLVM_DEBUG(dbgs() << "Mapping vreg ";);
-      if (MRI.reg_begin(Reg) != MRI.reg_end()) {
-        LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
-      } else {
-        LLVM_DEBUG(dbgs() << Reg;);
-      }
-      LLVM_DEBUG(dbgs() << " to ";);
-      if (MRI.reg_begin(Rename) != MRI.reg_end()) {
-        LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
-      } else {
-        LLVM_DEBUG(dbgs() << Rename;);
-      }
-      LLVM_DEBUG(dbgs() << "\n";);
+  auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) {
+    if (VRegNameCollisionMap.find(Reg.getName()) == VRegNameCollisionMap.end())
+      VRegNameCollisionMap[Reg.getName()] = 0;
+    const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()];
+    return Reg.getName() + "__" + std::to_string(Counter);
+  };
 
-      VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
-    }
+  VRegRenameMap VRM;
+  for (const auto &VReg : VRegs) {
+    const unsigned Reg = VReg.getReg();
+    VRM[Reg] = createVirtualRegisterWithLowerName(Reg, GetUniqueVRegName(VReg));
   }
-
-  return VRegRenameMap;
+  return VRM;
 }
 
-bool doVRegRenaming(std::vector<Register> &renamedInOtherBB,
-                    const std::map<unsigned, unsigned> &VRegRenameMap,
-                    MachineRegisterInfo &MRI) {
-  bool Changed = false;
-  for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
-
-    auto VReg = I->first;
-    auto Rename = I->second;
-
-    renamedInOtherBB.push_back(Rename);
-
-    std::vector<MachineOperand *> RenameMOs;
-    for (auto &MO : MRI.reg_operands(VReg)) {
-      RenameMOs.push_back(&MO);
-    }
-
-    for (auto *MO : RenameMOs) {
-      Changed = true;
-      MO->setReg(Rename);
+std::string VRegRenamer::getInstructionOpcodeHash(MachineInstr &MI) {
+  std::string S;
+  raw_string_ostream OS(S);
 
-      if (!MO->isDef())
-        MO->setIsKill(false);
+  // Gets a hashable artifact from a given MachineOperand (ie an unsigned).
+  auto GetHashableMO = [this](const MachineOperand &MO) -> unsigned {
+    switch (MO.getType()) {
+    case MachineOperand::MO_CImmediate:
+      return hash_combine(MO.getType(), MO.getTargetFlags(),
+                          MO.getCImm()->getZExtValue());
+    case MachineOperand::MO_FPImmediate:
+      return hash_combine(
+          MO.getType(), MO.getTargetFlags(),
+          MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+    case MachineOperand::MO_Register:
+      if (Register::isVirtualRegister(MO.getReg()))
+        return MRI.getVRegDef(MO.getReg())->getOpcode();
+      return MO.getReg();
+    case MachineOperand::MO_Immediate:
+      return MO.getImm();
+    case MachineOperand::MO_TargetIndex:
+      return MO.getOffset() | (MO.getTargetFlags() << 16);
+    case MachineOperand::MO_FrameIndex:
+      return llvm::hash_value(MO);
+
+    // We could explicitly handle all the types of the MachineOperand,
+    // here but we can just return a common number until we find a
+    // compelling test case where this is bad. The only side effect here
+    // is contributing to a hash collision but there's enough information
+    // (Opcodes,other registers etc) that this will likely not be a problem.
+
+    // TODO: Handle the following Index/ID/Predicate cases. They can
+    // be hashed on in a stable manner.
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_JumpTableIndex:
+    case MachineOperand::MO_CFIIndex:
+    case MachineOperand::MO_IntrinsicID:
+    case MachineOperand::MO_Predicate:
+
+    // In the cases below we havn't found a way to produce an artifact that will
+    // result in a stable hash, in most cases because they are pointers. We want
+    // stable hashes because we want the hash to be the same run to run.
+    case MachineOperand::MO_MachineBasicBlock:
+    case MachineOperand::MO_ExternalSymbol:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_BlockAddress:
+    case MachineOperand::MO_RegisterMask:
+    case MachineOperand::MO_RegisterLiveOut:
+    case MachineOperand::MO_Metadata:
+    case MachineOperand::MO_MCSymbol:
+    case MachineOperand::MO_ShuffleMask:
+      return 0;
     }
+    llvm_unreachable("Unexpected MachineOperandType.");
+  };
+
+  SmallVector<unsigned, 16> MIOperands = {MI.getOpcode(), MI.getFlags()};
+  llvm::transform(MI.uses(), std::back_inserter(MIOperands), GetHashableMO);
+
+  for (const auto *Op : MI.memoperands()) {
+    MIOperands.push_back((unsigned)Op->getSize());
+    MIOperands.push_back((unsigned)Op->getFlags());
+    MIOperands.push_back((unsigned)Op->getOffset());
+    MIOperands.push_back((unsigned)Op->getOrdering());
+    MIOperands.push_back((unsigned)Op->getAddrSpace());
+    MIOperands.push_back((unsigned)Op->getSyncScopeID());
+    MIOperands.push_back((unsigned)Op->getBaseAlignment());
+    MIOperands.push_back((unsigned)Op->getFailureOrdering());
   }
 
-  return Changed;
+  auto HashMI = hash_combine_range(MIOperands.begin(), MIOperands.end());
+  return std::to_string(HashMI).substr(0, 5);
 }
 
-bool renameVRegs(MachineBasicBlock *MBB,
-                 std::vector<Register> &renamedInOtherBB,
-                 NamedVRegCursor &NVC) {
-  bool Changed = false;
-  MachineFunction &MF = *MBB->getParent();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
-  std::vector<MachineInstr *> VisitedMIs;
-  llvm::copy(Candidates, std::back_inserter(VisitedMIs));
-
-  std::vector<TypedVReg> VRegs;
-  for (auto candidate : Candidates) {
-    VRegs.push_back(TypedVReg(RSE_NewCandidate));
-
-    std::queue<TypedVReg> RegQueue;
-
-    // Here we walk the vreg operands of a non-root node along our walk.
-    // The root nodes are the original candidates (stores normally).
-    // These are normally not the root nodes (except for the case of copies to
-    // physical registers).
-    for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
-      if (candidate->mayStore() || candidate->isBranch())
-        break;
-
-      MachineOperand &MO = candidate->getOperand(i);
-      if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
-        continue;
-
-      LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
-      RegQueue.push(TypedVReg(MO.getReg()));
-    }
-
-    // Here we walk the root candidates. We start from the 0th operand because
-    // the root is normally a store to a vreg.
-    for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
-
-      if (!candidate->mayStore() && !candidate->isBranch())
-        break;
-
-      MachineOperand &MO = candidate->getOperand(i);
-
-      // TODO: Do we want to only add vregs here?
-      if (!MO.isReg() && !MO.isFI())
-        continue;
-
-      LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
-
-      RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
-                               : TypedVReg(RSE_FrameIndex));
-    }
-
-    doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
-  }
-
-  // If we have populated no vregs to rename then bail.
-  // The rest of this function does the vreg remaping.
-  if (VRegs.size() == 0)
-    return Changed;
-
-  auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
-  Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
-  return Changed;
+unsigned VRegRenamer::createVirtualRegister(unsigned VReg) {
+  assert(Register::isVirtualRegister(VReg) && "Expected Virtual Registers");
+  std::string Name = getInstructionOpcodeHash(*MRI.getVRegDef(VReg));
+  return createVirtualRegisterWithLowerName(VReg, Name);
 }
-} // anonymous namespace
 
-void NamedVRegCursor::skipVRegs() {
-  unsigned VRegGapIndex = 1;
-  if (!virtualVRegNumber) {
-    VRegGapIndex = 0;
-    virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+bool VRegRenamer::renameInstsInMBB(MachineBasicBlock *MBB) {
+  std::vector<NamedVReg> VRegs;
+  std::string Prefix = "bb" + std::to_string(CurrentBBNumber) + "_";
+  for (MachineInstr &Candidate : *MBB) {
+    // Don't rename stores/branches.
+    if (Candidate.mayStore() || Candidate.isBranch())
+      continue;
+    if (!Candidate.getNumOperands())
+      continue;
+    // Look for instructions that define VRegs in operand 0.
+    MachineOperand &MO = Candidate.getOperand(0);
+    // Avoid non regs, instructions defining physical regs.
+    if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+      continue;
+    VRegs.push_back(
+        NamedVReg(MO.getReg(), Prefix + getInstructionOpcodeHash(Candidate)));
   }
-  const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize);
 
-  unsigned I = virtualVRegNumber;
-  const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
-  virtualVRegNumber = E;
-}
-
-unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) {
-  if (!virtualVRegNumber)
-    skipVRegs();
-  std::string S;
-  raw_string_ostream OS(S);
-  OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
-  OS.flush();
-  virtualVRegNumber++;
-  if (auto RC = MRI.getRegClassOrNull(VReg))
-    return MRI.createVirtualRegister(RC, OS.str());
-  return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
+  return VRegs.size() ? doVRegRenaming(getVRegRenameMap(VRegs)) : false;
 }
 
-bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) {
-  return ::renameVRegs(MBB, RenamedInOtherBB, *this);
+unsigned VRegRenamer::createVirtualRegisterWithLowerName(unsigned VReg,
+                                                         StringRef Name) {
+  std::string LowerName = Name.lower();
+  const TargetRegisterClass *RC = MRI.getRegClassOrNull(VReg);
+  return RC ? MRI.createVirtualRegister(RC, LowerName)
+            : MRI.createGenericVirtualRegister(MRI.getType(VReg), LowerName);
 }
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.h b/llvm/lib/CodeGen/MIRVRegNamerUtils.h
index c5b52a968538..0c0a71a13248 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.h
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.h
@@ -25,65 +25,67 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <queue>
-
 namespace llvm {
+/// VRegRenamer - This class is used for renaming vregs in a machine basic
+/// block according to semantics of the instruction.
+class VRegRenamer {
+  class NamedVReg {
+    Register Reg;
+    std::string Name;
+
+  public:
+    NamedVReg(Register Reg, std::string Name = "") : Reg(Reg), Name(Name) {}
+    NamedVReg(std::string Name = "") : Reg(~0U), Name(Name) {}
+
+    const std::string &getName() const { return Name; }
+
+    Register getReg() const { return Reg; }
+  };
 
-/// NamedVRegCursor - The cursor is an object that keeps track of what the next
-/// vreg name should be. It does book keeping to determine when to skip the
-/// index value and by how much, or if the next vreg name should be an increment
-/// from the previous.
-class NamedVRegCursor {
   MachineRegisterInfo &MRI;
 
-  /// virtualVRegNumber - Book keeping of the last vreg position.
-  unsigned virtualVRegNumber;
+  unsigned CurrentBBNumber = 0;
 
-  /// SkipGapSize - Used to calculate a modulo amount to skip by after every
-  /// sequence of instructions starting from a given side-effecting
-  /// MachineInstruction for a given MachineBasicBlock. The general idea is that
-  /// for a given program compiled with two different opt pipelines, there
-  /// shouldn't be greater than SkipGapSize difference in how many vregs are in
-  /// play between the two and for every def-use graph of vregs we rename we
-  /// will round up to the next SkipGapSize'th number so that we have a high
-  /// change of landing on the same name for two given matching side-effects
-  /// for the two compilation outcomes.
-  const unsigned SkipGapSize;
+  /// Given an Instruction, construct a hash of the operands
+  /// of the instructions along with the opcode.
+  /// When dealing with virtual registers, just hash the opcode of
+  /// the instruction defining that vreg.
+  /// Handle immediates, registers (physical and virtual) explicitly,
+  /// and return a common value for the other cases.
+  /// Instruction will be named in the following scheme
+  /// bb<block_no>_hash_<collission_count>.
+  std::string getInstructionOpcodeHash(MachineInstr &MI);
 
-  /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs.
-  std::vector<Register> RenamedInOtherBB;
+  /// For all the VRegs that are candidates for renaming,
+  /// return a mapping from old vregs to new vregs with names.
+  std::map<unsigned, unsigned>
+  getVRegRenameMap(const std::vector<NamedVReg> &VRegs);
 
-public:
-  NamedVRegCursor() = delete;
-  /// 1000 for the SkipGapSize was a good heuristic at the time of the writing
-  /// of the MIRCanonicalizerPass. Adjust as needed.
-  NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000)
-      : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {}
-
-  /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to
-  /// produce the next vreg name.
-  void skipVRegs();
-
-  unsigned getVirtualVReg() const { return virtualVRegNumber; }
-
-  /// incrementVirtualVReg - This increments an index value that us used to
-  /// create a new vreg name. This is not a Register.
-  unsigned incrementVirtualVReg(unsigned incr = 1) {
-    virtualVRegNumber += incr;
-    return virtualVRegNumber;
-  }
+  /// Perform replacing of registers based on the <old,new> vreg map.
+  bool doVRegRenaming(const std::map<unsigned, unsigned> &VRegRenameMap);
 
   /// createVirtualRegister - Given an existing vreg, create a named vreg to
-  /// take its place.
+  /// take its place. The name is determined by calling
+  /// getInstructionOpcodeHash.
   unsigned createVirtualRegister(unsigned VReg);
 
-  /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting
-  /// instructions, walk the def-use from each side-effecting root (in sorted
-  /// root order) and rename the encountered vregs in the def-use graph in a
-  /// canonical ordering. This method maintains book keeping for which vregs
-  /// were already renamed in RenamedInOtherBB.
-  // @return changed
-  bool renameVRegs(MachineBasicBlock *MBB);
+  /// Create a vreg with name and return it.
+  unsigned createVirtualRegisterWithLowerName(unsigned VReg, StringRef Name);
+  /// Linearly traverse the MachineBasicBlock and rename each instruction's
+  /// vreg definition based on the semantics of the instruction.
+  /// Names are as follows bb<BBNum>_hash_[0-9]+
+  bool renameInstsInMBB(MachineBasicBlock *MBB);
+
+public:
+  VRegRenamer() = delete;
+  VRegRenamer(MachineRegisterInfo &MRI) : MRI(MRI) {}
+
+  /// Same as the above, but sets a BBNum depending on BB traversal that
+  /// will be used as prefix for the vreg names.
+  bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum) {
+    CurrentBBNumber = BBNum;
+    return renameInstsInMBB(MBB);
+  }
 };
 
 } // namespace llvm
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 854bef3aab05..f433c4b6c90b 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -1395,8 +1395,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
 
     --N;
 
-    MachineOperandIteratorBase::PhysRegInfo Info =
-        ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+    PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
 
     // Register is live when we read it here.
     if (Info.Read)
@@ -1434,8 +1433,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
 
       --N;
 
-      MachineOperandIteratorBase::PhysRegInfo Info =
-          ConstMIOperands(*I).analyzePhysReg(Reg, TRI);
+      PhysRegInfo Info = AnalyzePhysRegInBundle(*I, Reg, TRI);
 
       // Defs happen after uses so they take precedence if both are present.
 
@@ -1462,6 +1460,11 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
     } while (I != begin() && N > 0);
   }
 
+  // If all the instructions before this in the block are debug instructions,
+  // skip over them.
+  while (I != begin() && std::prev(I)->isDebugInstr())
+    --I;
+
   // Did we get to the start of the block?
   if (I == begin()) {
     // If so, the register's state is definitely defined by the live-in state.
diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 53a35b7e89c2..d8ea3e0b9cf6 100644
--- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
@@ -172,6 +173,13 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo()
   initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
 }
 
+MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
+      MachineFunction &F,
+      MachineBranchProbabilityInfo &MBPI,
+      MachineLoopInfo &MLI) : MachineFunctionPass(ID) {
+  calculate(F, MBPI, MLI);
+}
+
 MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
 
 void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index ac19bc0bd8ea..30b98ec88c24 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -41,6 +42,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/TailDuplicator.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -48,6 +50,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/BlockFrequency.h"
@@ -362,6 +365,8 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// A handle to the post dominator tree.
   MachinePostDominatorTree *MPDT;
 
+  ProfileSummaryInfo *PSI;
+
   /// Duplicator used to duplicate tails during placement.
   ///
   /// Placement decisions can open up new tail duplication opportunities, but
@@ -537,6 +542,7 @@ public:
     if (TailDupPlacement)
       AU.addRequired<MachinePostDominatorTree>();
     AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<TargetPassConfig>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -554,6 +560,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
                     "Branch Probability Basic Block Placement", false, false)
 
@@ -1073,6 +1080,11 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
   if (!shouldTailDuplicate(Succ))
     return false;
 
+  // The result of canTailDuplicate.
+  bool Duplicate = true;
+  // Number of possible duplication.
+  unsigned int NumDup = 0;
+
   // For CFG checking.
   SmallPtrSet<const MachineBasicBlock *, 4> Successors(BB->succ_begin(),
                                                        BB->succ_end());
@@ -1119,9 +1131,50 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
         // to trellises created by tail-duplication, so we just look for the
         // CFG.
         continue;
-      return false;
+      Duplicate = false;
+      continue;
     }
+    NumDup++;
   }
+
+  // No possible duplication in current filter set.
+  if (NumDup == 0)
+    return false;
+
+  // This is mainly for function exit BB.
+  // The integrated tail duplication is really designed for increasing
+  // fallthrough from predecessors from Succ to its successors. We may need
+  // other machanism to handle different cases.
+  if (Succ->succ_size() == 0)
+    return true;
+
+  // Plus the already placed predecessor.
+  NumDup++;
+
+  // If the duplication candidate has more unplaced predecessors than
+  // successors, the extra duplication can't bring more fallthrough.
+  //
+  //     Pred1 Pred2 Pred3
+  //         \   |   /
+  //          \  |  /
+  //           \ | /
+  //            Dup
+  //            / \
+  //           /   \
+  //       Succ1  Succ2
+  //
+  // In this example Dup has 2 successors and 3 predecessors, duplication of Dup
+  // can increase the fallthrough from Pred1 to Succ1 and from Pred2 to Succ2,
+  // but the duplication into Pred3 can't increase fallthrough.
+  //
+  // A small number of extra duplication may not hurt too much. We need a better
+  // heuristic to handle it.
+  //
+  // FIXME: we should selectively tail duplicate a BB into part of its
+  // predecessors.
+  if ((NumDup > Succ->succ_size()) || !Duplicate)
+    return false;
+
   return true;
 }
 
@@ -1417,9 +1470,10 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   bool BadCFGConflict = false;
 
   for (MachineBasicBlock *Pred : Succ->predecessors()) {
-    if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
+    BlockChain *PredChain = BlockToChain[Pred];
+    if (Pred == Succ || PredChain == &SuccChain ||
         (BlockFilter && !BlockFilter->count(Pred)) ||
-        BlockToChain[Pred] == &Chain ||
+        PredChain == &Chain || Pred != *std::prev(PredChain->end()) ||
         // This check is redundant except for look ahead. This function is
         // called for lookahead by isProfitableToTailDup when BB hasn't been
         // placed yet.
@@ -1721,7 +1775,9 @@ void MachineBlockPlacement::buildChain(
     MachineBasicBlock* BestSucc = Result.BB;
     bool ShouldTailDup = Result.ShouldTailDup;
     if (allowTailDupPlacement())
-      ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc));
+      ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(BB, BestSucc,
+                                                                  Chain,
+                                                                  BlockFilter));
 
     // If an immediate successor isn't available, look for the best viable
     // block among those we've identified as not violating the loop's CFG at
@@ -2025,7 +2081,10 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
   // i.e. when the layout predecessor does not fallthrough to the loop header.
   // In practice this never happens though: there always seems to be a preheader
   // that can fallthrough and that is also placed before the header.
-  if (F->getFunction().hasOptSize())
+  bool OptForSize = F->getFunction().hasOptSize() ||
+                    llvm::shouldOptimizeForSize(L.getHeader(), PSI,
+                                                &MBFI->getMBFI());
+  if (OptForSize)
     return L.getHeader();
 
   MachineBasicBlock *OldTop = nullptr;
@@ -2781,6 +2840,11 @@ void MachineBlockPlacement::alignBlocks() {
     if (Freq < (LoopHeaderFreq * ColdProb))
       continue;
 
+    // If the global profiles indicates so, don't align it.
+    if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
+        !TLI->alignLoopsWithOptSize())
+      continue;
+
     // Check for the existence of a non-layout predecessor which would benefit
     // from aligning this block.
     MachineBasicBlock *LayoutPred =
@@ -2988,6 +3052,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   TLI = MF.getSubtarget().getTargetLowering();
   MPDT = nullptr;
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
 
   // Initialize PreferredLoopExit to nullptr here since it may never be set if
   // there are no MachineLoops.
@@ -3018,10 +3083,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
 
   if (allowTailDupPlacement()) {
     MPDT = &getAnalysis<MachinePostDominatorTree>();
-    if (MF.getFunction().hasOptSize())
+    bool OptForSize = MF.getFunction().hasOptSize() ||
+                      llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
+    if (OptForSize)
       TailDupSize = 1;
     bool PreRegAlloc = false;
-    TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
+    TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
+                   /* LayoutMode */ true, TailDupSize);
     precomputeTriangleChains();
   }
 
@@ -3037,7 +3105,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   if (MF.size() > 3 && EnableTailMerge) {
     unsigned TailMergeSize = TailDupSize + 1;
     BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
-                    *MBPI, TailMergeSize);
+                    *MBPI, PSI, TailMergeSize);
 
     auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
     if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index d2277ce51746..f1d68c79a212 100644
--- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -13,6 +13,8 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -37,6 +39,12 @@ cl::opt<unsigned> ProfileLikelyProb(
 
 char MachineBranchProbabilityInfo::ID = 0;
 
+MachineBranchProbabilityInfo::MachineBranchProbabilityInfo()
+    : ImmutablePass(ID) {
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeMachineBranchProbabilityInfoPass(Registry);
+}
+
 void MachineBranchProbabilityInfo::anchor() {}
 
 BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index d9bd32b2fbab..9561a06ce8df 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index e9f462fd1b37..73895bdf834f 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -12,17 +12,21 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -66,6 +70,8 @@ class MachineCombiner : public MachineFunctionPass {
   MachineLoopInfo *MLI; // Current MachineLoopInfo
   MachineTraceMetrics *Traces;
   MachineTraceMetrics::Ensemble *MinInstr;
+  MachineBlockFrequencyInfo *MBFI;
+  ProfileSummaryInfo *PSI;
 
   TargetSchedModel TSchedModel;
 
@@ -82,7 +88,7 @@ public:
   StringRef getPassName() const override { return "Machine InstCombiner"; }
 
 private:
-  bool doSubstitute(unsigned NewSize, unsigned OldSize);
+  bool doSubstitute(unsigned NewSize, unsigned OldSize, bool OptForSize);
   bool combineInstructions(MachineBasicBlock *);
   MachineInstr *getOperandDef(const MachineOperand &MO);
   unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -131,6 +137,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<MachineLoopInfo>();
   AU.addRequired<MachineTraceMetrics>();
   AU.addPreserved<MachineTraceMetrics>();
+  AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -408,8 +416,9 @@ bool MachineCombiner::preservesResourceLen(
 
 /// \returns true when new instruction sequence should be generated
 /// independent if it lengthens critical path or not
-bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
-  if (OptSize && (NewSize < OldSize))
+bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize,
+                                   bool OptForSize) {
+  if (OptForSize && (NewSize < OldSize))
     return true;
   if (!TSchedModel.hasInstrSchedModelOrItineraries())
     return true;
@@ -507,6 +516,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
   SparseSet<LiveRegUnit> RegUnits;
   RegUnits.setUniverse(TRI->getNumRegUnits());
 
+  bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+
   while (BlockIter != MBB->end()) {
     auto &MI = *BlockIter++;
     SmallVector<MachineCombinerPattern, 16> Patterns;
@@ -583,7 +594,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
       // fewer instructions OR
       // the new sequence neither lengthens the critical path nor increases
       // resource pressure.
-      if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount)) {
+      if (SubstituteAlways ||
+          doSubstitute(NewInstCount, OldInstCount, OptForSize)) {
         insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
                                  RegUnits, IncrementalUpdate);
         // Eagerly stop after the first pattern fires.
@@ -638,6 +650,10 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   MLI = &getAnalysis<MachineLoopInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
+  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  MBFI = (PSI && PSI->hasProfileSummary()) ?
+         &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+         nullptr;
   MinInstr = nullptr;
   OptSize = MF.getFunction().hasOptSize();
 
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index ebe76e31dca9..c316b167059b 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -37,6 +37,15 @@
 //    ... // No clobber of %R0
 //    %R1 = COPY %R0 <<< Removed
 //
+// or
+//
+//    $R0 = OP ...
+//    ... // No read/clobber of $R0 and $R1
+//    $R1 = COPY $R0 // $R0 is killed
+// Replace $R0 with $R1 and remove the COPY
+//    $R1 = OP ...
+//    ...
+//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/DenseMap.h"
@@ -54,6 +63,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
@@ -68,6 +78,7 @@ using namespace llvm;
 
 STATISTIC(NumDeletes, "Number of dead copies deleted");
 STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
+STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
 DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
               "Controls which register COPYs are forwarded");
 
@@ -97,6 +108,28 @@ public:
     }
   }
 
+  /// Remove register from copy maps.
+  void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
+    // Since Reg might be a subreg of some registers, only invalidate Reg is not
+    // enough. We have to find the COPY defines Reg or registers defined by Reg
+    // and invalidate all of them.
+    DenseSet<unsigned> RegsToInvalidate{Reg};
+    for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
+      auto I = Copies.find(*RUI);
+      if (I != Copies.end()) {
+        if (MachineInstr *MI = I->second.MI) {
+          RegsToInvalidate.insert(MI->getOperand(0).getReg());
+          RegsToInvalidate.insert(MI->getOperand(1).getReg());
+        }
+        RegsToInvalidate.insert(I->second.DefRegs.begin(),
+                                I->second.DefRegs.end());
+      }
+    }
+    for (unsigned InvalidReg : RegsToInvalidate)
+      for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
+        Copies.erase(*RUI);
+  }
+
   /// Clobber a single register, removing it from the tracker's copy maps.
   void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) {
     for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
@@ -150,6 +183,38 @@ public:
     return CI->second.MI;
   }
 
+  MachineInstr *findCopyDefViaUnit(unsigned RegUnit,
+                                    const TargetRegisterInfo &TRI) {
+    auto CI = Copies.find(RegUnit);
+    if (CI == Copies.end())
+      return nullptr;
+    if (CI->second.DefRegs.size() != 1)
+      return nullptr;
+    MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI);
+    return findCopyForUnit(*RUI, TRI, true);
+  }
+
+  MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg,
+                                      const TargetRegisterInfo &TRI) {
+    MCRegUnitIterator RUI(Reg, &TRI);
+    MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
+    if (!AvailCopy ||
+        !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg))
+      return nullptr;
+
+    Register AvailSrc = AvailCopy->getOperand(1).getReg();
+    Register AvailDef = AvailCopy->getOperand(0).getReg();
+    for (const MachineInstr &MI :
+         make_range(AvailCopy->getReverseIterator(), I.getReverseIterator()))
+      for (const MachineOperand &MO : MI.operands())
+        if (MO.isRegMask())
+          // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef?
+          if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef))
+            return nullptr;
+
+    return AvailCopy;
+  }
+
   MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg,
                               const TargetRegisterInfo &TRI) {
     // We check the first RegUnit here, since we'll only be interested in the
@@ -210,11 +275,16 @@ private:
   void ClobberRegister(unsigned Reg);
   void ReadRegister(unsigned Reg, MachineInstr &Reader,
                     DebugType DT);
-  void CopyPropagateBlock(MachineBasicBlock &MBB);
+  void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
+  void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
   bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
   void forwardUses(MachineInstr &MI);
+  void propagateDefs(MachineInstr &MI);
   bool isForwardableRegClassCopy(const MachineInstr &Copy,
                                  const MachineInstr &UseI, unsigned UseIdx);
+  bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy,
+                                          const MachineInstr &UseI,
+                                          unsigned UseIdx);
   bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use);
 
   /// Candidates for deletion.
@@ -312,6 +382,19 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
   return true;
 }
 
+bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy(
+    const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) {
+  Register Def = Copy.getOperand(0).getReg();
+
+  if (const TargetRegisterClass *URC =
+          UseI.getRegClassConstraint(UseIdx, TII, TRI))
+    return URC->contains(Def);
+
+  // We don't process further if UseI is a COPY, since forward copy propagation
+  // should handle that.
+  return false;
+}
+
 /// Decide whether we should forward the source of \param Copy to its use in
 /// \param UseI based on the physical register class constraints of the opcode
 /// and avoiding introducing more cross-class COPYs.
@@ -432,6 +515,15 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
     if (hasImplicitOverlap(MI, MOUse))
       continue;
 
+    // Check that the instruction is not a copy that partially overwrites the
+    // original copy source that we are about to use. The tracker mechanism
+    // cannot cope with that.
+    if (MI.isCopy() && MI.modifiesRegister(CopySrcReg, TRI) &&
+        !MI.definesRegister(CopySrcReg)) {
+      LLVM_DEBUG(dbgs() << "MCP: Copy source overlap with dest in " << MI);
+      continue;
+    }
+
     if (!DebugCounter::shouldExecute(FwdCounter)) {
       LLVM_DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n  "
                         << MI);
@@ -458,8 +550,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
   }
 }
 
-void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
-  LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n");
+void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
+  LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
+                    << "\n");
 
   for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
     MachineInstr *MI = &*I;
@@ -637,6 +730,137 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
   Tracker.clear();
 }
 
+static bool isBackwardPropagatableCopy(MachineInstr &MI,
+                                       const MachineRegisterInfo &MRI) {
+  assert(MI.isCopy() && "MI is expected to be a COPY");
+  Register Def = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+
+  if (!Def || !Src)
+    return false;
+
+  if (MRI.isReserved(Def) || MRI.isReserved(Src))
+    return false;
+
+  return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill();
+}
+
+void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
+  if (!Tracker.hasAnyCopies())
+    return;
+
+  for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd;
+       ++OpIdx) {
+    MachineOperand &MODef = MI.getOperand(OpIdx);
+
+    if (!MODef.isReg() || MODef.isUse())
+      continue;
+
+    // Ignore non-trivial cases.
+    if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit())
+      continue;
+
+    if (!MODef.getReg())
+      continue;
+
+    // We only handle if the register comes from a vreg.
+    if (!MODef.isRenamable())
+      continue;
+
+    MachineInstr *Copy =
+        Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI);
+    if (!Copy)
+      continue;
+
+    Register Def = Copy->getOperand(0).getReg();
+    Register Src = Copy->getOperand(1).getReg();
+
+    if (MODef.getReg() != Src)
+      continue;
+
+    if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx))
+      continue;
+
+    if (hasImplicitOverlap(MI, MODef))
+      continue;
+
+    LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI)
+                      << "\n     with " << printReg(Def, TRI) << "\n     in "
+                      << MI << "     from " << *Copy);
+
+    MODef.setReg(Def);
+    MODef.setIsRenamable(Copy->getOperand(0).isRenamable());
+
+    LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
+    MaybeDeadCopies.insert(Copy);
+    Changed = true;
+    ++NumCopyBackwardPropagated;
+  }
+}
+
+void MachineCopyPropagation::BackwardCopyPropagateBlock(
+    MachineBasicBlock &MBB) {
+  LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName()
+                    << "\n");
+
+  for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+       I != E;) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    // Ignore non-trivial COPYs.
+    if (MI->isCopy() && MI->getNumOperands() == 2 &&
+        !TRI->regsOverlap(MI->getOperand(0).getReg(),
+                          MI->getOperand(1).getReg())) {
+
+      Register Def = MI->getOperand(0).getReg();
+      Register Src = MI->getOperand(1).getReg();
+
+      // Unlike forward cp, we don't invoke propagateDefs here,
+      // just let forward cp do COPY-to-COPY propagation.
+      if (isBackwardPropagatableCopy(*MI, *MRI)) {
+        Tracker.invalidateRegister(Src, *TRI);
+        Tracker.invalidateRegister(Def, *TRI);
+        Tracker.trackCopy(MI, *TRI);
+        continue;
+      }
+    }
+
+    // Invalidate any earlyclobber regs first.
+    for (const MachineOperand &MO : MI->operands())
+      if (MO.isReg() && MO.isEarlyClobber()) {
+        Register Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        Tracker.invalidateRegister(Reg, *TRI);
+      }
+
+    propagateDefs(*MI);
+    for (const MachineOperand &MO : MI->operands()) {
+      if (!MO.isReg())
+        continue;
+
+      if (!MO.getReg())
+        continue;
+
+      if (MO.isDef())
+        Tracker.invalidateRegister(MO.getReg(), *TRI);
+
+      if (MO.readsReg())
+        Tracker.invalidateRegister(MO.getReg(), *TRI);
+    }
+  }
+
+  for (auto *Copy : MaybeDeadCopies) {
+    Copy->eraseFromParent();
+    ++NumDeletes;
+  }
+
+  MaybeDeadCopies.clear();
+  CopyDbgUsers.clear();
+  Tracker.clear();
+}
+
 bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -647,8 +871,10 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 
-  for (MachineBasicBlock &MBB : MF)
-    CopyPropagateBlock(MBB);
+  for (MachineBasicBlock &MBB : MF) {
+    BackwardCopyPropagateBlock(MBB);
+    ForwardCopyPropagateBlock(MBB);
+  }
 
   return Changed;
 }
diff --git a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
index 6704298c17d6..6ddb1758719b 100644
--- a/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/llvm/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -10,6 +10,7 @@
 #include "llvm/Analysis/DominanceFrontierImpl.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp
index 706c706d7527..c8845d838282 100644
--- a/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/llvm/lib/CodeGen/MachineDominators.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
@@ -49,11 +50,15 @@ void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+  calculate(F);
+  return false;
+}
+
+void MachineDominatorTree::calculate(MachineFunction &F) {
   CriticalEdgesToSplit.clear();
   NewBBs.clear();
   DT.reset(new DomTreeBase<MachineBasicBlock>());
   DT->recalculate(F);
-  return false;
 }
 
 MachineDominatorTree::MachineDominatorTree()
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index 604f5145b1a0..22ab2c7a6d77 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -133,11 +133,11 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
   return BV;
 }
 
-unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
+uint64_t MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
   unsigned MaxAlign = getMaxAlignment();
-  int Offset = 0;
+  int64_t Offset = 0;
 
   // This code is very, very similar to PEI::calculateFrameObjectOffsets().
   // It really should be refactored to share code. Until then, changes
@@ -147,7 +147,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
     // Only estimate stack size of default stack.
     if (getStackID(i) != TargetStackID::Default)
       continue;
-    int FixedOff = -getObjectOffset(i);
+    int64_t FixedOff = -getObjectOffset(i);
     if (FixedOff > Offset) Offset = FixedOff;
   }
   for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
@@ -183,7 +183,7 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
   unsigned AlignMask = StackAlign - 1;
   Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
 
-  return (unsigned)Offset;
+  return (uint64_t)Offset;
 }
 
 void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 7d2ee230ca9f..4612690644fe 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -270,6 +270,21 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
   return JumpTableInfo;
 }
 
+DenormalMode MachineFunction::getDenormalMode(const fltSemantics &FPType) const {
+  // TODO: Should probably avoid the connection to the IR and store directly
+  // in the MachineFunction.
+  Attribute Attr = F.getFnAttribute("denormal-fp-math");
+
+  // FIXME: This should assume IEEE behavior on an unspecified
+  // attribute. However, the one current user incorrectly assumes a non-IEEE
+  // target by default.
+  StringRef Val = Attr.getValueAsString();
+  if (Val.empty())
+    return DenormalMode::Invalid;
+
+  return parseDenormalFPAttribute(Val);
+}
+
 /// Should we be emitting segmented stack stuff for the function
 bool MachineFunction::shouldSplitStack() const {
   return getFunction().hasFnAttribute("split-stack");
@@ -447,12 +462,11 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
       MMO->getOrdering(), MMO->getFailureOrdering());
 }
 
-MachineInstr::ExtraInfo *
-MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
-                                   MCSymbol *PreInstrSymbol,
-                                   MCSymbol *PostInstrSymbol) {
+MachineInstr::ExtraInfo *MachineFunction::createMIExtraInfo(
+    ArrayRef<MachineMemOperand *> MMOs, MCSymbol *PreInstrSymbol,
+    MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker) {
   return MachineInstr::ExtraInfo::create(Allocator, MMOs, PreInstrSymbol,
-                                         PostInstrSymbol);
+                                         PostInstrSymbol, HeapAllocMarker);
 }
 
 const char *MachineFunction::createExternalSymbolName(StringRef Name) {
@@ -470,6 +484,12 @@ uint32_t *MachineFunction::allocateRegMask() {
   return Mask;
 }
 
+ArrayRef<int> MachineFunction::allocateShuffleMask(ArrayRef<int> Mask) {
+  int* AllocMask = Allocator.Allocate<int>(Mask.size());
+  copy(Mask, AllocMask);
+  return {AllocMask, Mask.size()};
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MachineFunction::dump() const {
   print(dbgs());
@@ -521,6 +541,13 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
   OS << "\n# End machine code for function " << getName() << ".\n\n";
 }
 
+/// True if this function needs frame moves for debug or exceptions.
+bool MachineFunction::needsFrameMoves() const {
+  return getMMI().hasDebugInfo() ||
+         getTarget().Options.ForceDwarfFrameSection ||
+         F.needsUnwindTableEntry();
+}
+
 namespace llvm {
 
   template<>
@@ -824,15 +851,13 @@ try_next:;
   return FilterID;
 }
 
-void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I,
-                                               const MDNode *MD) {
-  MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
-  MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
-  I->setPreInstrSymbol(*this, BeginLabel);
-  I->setPostInstrSymbol(*this, EndLabel);
+MachineFunction::CallSiteInfoMap::iterator
+MachineFunction::getCallSiteInfo(const MachineInstr *MI) {
+  assert(MI->isCall() && "Call site info refers only to call instructions!");
 
-  const DIType *DI = dyn_cast<DIType>(MD);
-  CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
+  if (!Target.Options.EnableDebugEntryValues)
+    return CallSitesInfo.end();
+  return CallSitesInfo.find(MI);
 }
 
 void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
diff --git a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0ea8975cc74c..3645a4e3466b 100644
--- a/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index fec20b2b1a05..08d786f8f12c 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -187,8 +187,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
                          unsigned NumOps, MachineRegisterInfo *MRI) {
   if (MRI)
     return MRI->moveOperands(Dst, Src, NumOps);
-
   // MachineOperand is a trivially copyable type so we can just use memmove.
+  assert(Dst && Src && "Unknown operands");
   std::memmove(Dst, Src, NumOps * sizeof(MachineOperand));
 }
 
@@ -316,27 +316,48 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
   --NumOperands;
 }
 
-void MachineInstr::dropMemRefs(MachineFunction &MF) {
-  if (memoperands_empty())
-    return;
+void MachineInstr::setExtraInfo(MachineFunction &MF,
+                                ArrayRef<MachineMemOperand *> MMOs,
+                                MCSymbol *PreInstrSymbol,
+                                MCSymbol *PostInstrSymbol,
+                                MDNode *HeapAllocMarker) {
+  bool HasPreInstrSymbol = PreInstrSymbol != nullptr;
+  bool HasPostInstrSymbol = PostInstrSymbol != nullptr;
+  bool HasHeapAllocMarker = HeapAllocMarker != nullptr;
+  int NumPointers =
+      MMOs.size() + HasPreInstrSymbol + HasPostInstrSymbol + HasHeapAllocMarker;
 
-  // See if we can just drop all of our extra info.
-  if (!getPreInstrSymbol() && !getPostInstrSymbol()) {
+  // Drop all extra info if there is none.
+  if (NumPointers <= 0) {
     Info.clear();
     return;
   }
-  if (!getPostInstrSymbol()) {
-    Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
+
+  // If more than one pointer, then store out of line. Store heap alloc markers
+  // out of line because PointerSumType cannot hold more than 4 tag types with
+  // 32-bit pointers.
+  // FIXME: Maybe we should make the symbols in the extra info mutable?
+  else if (NumPointers > 1 || HasHeapAllocMarker) {
+    Info.set<EIIK_OutOfLine>(MF.createMIExtraInfo(
+        MMOs, PreInstrSymbol, PostInstrSymbol, HeapAllocMarker));
     return;
   }
-  if (!getPreInstrSymbol()) {
-    Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
+
+  // Otherwise store the single pointer inline.
+  if (HasPreInstrSymbol)
+    Info.set<EIIK_PreInstrSymbol>(PreInstrSymbol);
+  else if (HasPostInstrSymbol)
+    Info.set<EIIK_PostInstrSymbol>(PostInstrSymbol);
+  else
+    Info.set<EIIK_MMO>(MMOs[0]);
+}
+
+void MachineInstr::dropMemRefs(MachineFunction &MF) {
+  if (memoperands_empty())
     return;
-  }
 
-  // Otherwise allocate a fresh extra info with just these symbols.
-  Info.set<EIIK_OutOfLine>(
-      MF.createMIExtraInfo({}, getPreInstrSymbol(), getPostInstrSymbol()));
+  setExtraInfo(MF, {}, getPreInstrSymbol(), getPostInstrSymbol(),
+               getHeapAllocMarker());
 }
 
 void MachineInstr::setMemRefs(MachineFunction &MF,
@@ -346,15 +367,8 @@ void MachineInstr::setMemRefs(MachineFunction &MF,
     return;
   }
 
-  // Try to store a single MMO inline.
-  if (MMOs.size() == 1 && !getPreInstrSymbol() && !getPostInstrSymbol()) {
-    Info.set<EIIK_MMO>(MMOs[0]);
-    return;
-  }
-
-  // Otherwise create an extra info struct with all of our info.
-  Info.set<EIIK_OutOfLine>(
-      MF.createMIExtraInfo(MMOs, getPreInstrSymbol(), getPostInstrSymbol()));
+  setExtraInfo(MF, MMOs, getPreInstrSymbol(), getPostInstrSymbol(),
+               getHeapAllocMarker());
 }
 
 void MachineInstr::addMemOperand(MachineFunction &MF,
@@ -376,7 +390,8 @@ void MachineInstr::cloneMemRefs(MachineFunction &MF, const MachineInstr &MI) {
   // instruction. We can do this whenever the pre- and post-instruction symbols
   // are the same (including null).
   if (getPreInstrSymbol() == MI.getPreInstrSymbol() &&
-      getPostInstrSymbol() == MI.getPostInstrSymbol()) {
+      getPostInstrSymbol() == MI.getPostInstrSymbol() &&
+      getHeapAllocMarker() == MI.getHeapAllocMarker()) {
     Info = MI.Info;
     return;
   }
@@ -450,67 +465,42 @@ void MachineInstr::cloneMergedMemRefs(MachineFunction &MF,
 }
 
 void MachineInstr::setPreInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
-  MCSymbol *OldSymbol = getPreInstrSymbol();
-  if (OldSymbol == Symbol)
+  // Do nothing if old and new symbols are the same.
+  if (Symbol == getPreInstrSymbol())
     return;
-  if (OldSymbol && !Symbol) {
-    // We're removing a symbol rather than adding one. Try to clean up any
-    // extra info carried around.
-    if (Info.is<EIIK_PreInstrSymbol>()) {
-      Info.clear();
-      return;
-    }
 
-    if (memoperands_empty()) {
-      assert(getPostInstrSymbol() &&
-             "Should never have only a single symbol allocated out-of-line!");
-      Info.set<EIIK_PostInstrSymbol>(getPostInstrSymbol());
-      return;
-    }
-
-    // Otherwise fallback on the generic update.
-  } else if (!Info || Info.is<EIIK_PreInstrSymbol>()) {
-    // If we don't have any other extra info, we can store this inline.
-    Info.set<EIIK_PreInstrSymbol>(Symbol);
+  // If there was only one symbol and we're removing it, just clear info.
+  if (!Symbol && Info.is<EIIK_PreInstrSymbol>()) {
+    Info.clear();
     return;
   }
 
-  // Otherwise, allocate a full new set of extra info.
-  // FIXME: Maybe we should make the symbols in the extra info mutable?
-  Info.set<EIIK_OutOfLine>(
-      MF.createMIExtraInfo(memoperands(), Symbol, getPostInstrSymbol()));
+  setExtraInfo(MF, memoperands(), Symbol, getPostInstrSymbol(),
+               getHeapAllocMarker());
 }
 
 void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
-  MCSymbol *OldSymbol = getPostInstrSymbol();
-  if (OldSymbol == Symbol)
+  // Do nothing if old and new symbols are the same.
+  if (Symbol == getPostInstrSymbol())
     return;
-  if (OldSymbol && !Symbol) {
-    // We're removing a symbol rather than adding one. Try to clean up any
-    // extra info carried around.
-    if (Info.is<EIIK_PostInstrSymbol>()) {
-      Info.clear();
-      return;
-    }
 
-    if (memoperands_empty()) {
-      assert(getPreInstrSymbol() &&
-             "Should never have only a single symbol allocated out-of-line!");
-      Info.set<EIIK_PreInstrSymbol>(getPreInstrSymbol());
-      return;
-    }
-
-    // Otherwise fallback on the generic update.
-  } else if (!Info || Info.is<EIIK_PostInstrSymbol>()) {
-    // If we don't have any other extra info, we can store this inline.
-    Info.set<EIIK_PostInstrSymbol>(Symbol);
+  // If there was only one symbol and we're removing it, just clear info.
+  if (!Symbol && Info.is<EIIK_PostInstrSymbol>()) {
+    Info.clear();
     return;
   }
 
-  // Otherwise, allocate a full new set of extra info.
-  // FIXME: Maybe we should make the symbols in the extra info mutable?
-  Info.set<EIIK_OutOfLine>(
-      MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
+  setExtraInfo(MF, memoperands(), getPreInstrSymbol(), Symbol,
+               getHeapAllocMarker());
+}
+
+void MachineInstr::setHeapAllocMarker(MachineFunction &MF, MDNode *Marker) {
+  // Do nothing if old and new symbols are the same.
+  if (Marker == getHeapAllocMarker())
+    return;
+
+  setExtraInfo(MF, memoperands(), getPreInstrSymbol(), getPostInstrSymbol(),
+               Marker);
 }
 
 void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
@@ -524,6 +514,7 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
 
   setPreInstrSymbol(MF, MI.getPreInstrSymbol());
   setPostInstrSymbol(MF, MI.getPostInstrSymbol());
+  setHeapAllocMarker(MF, MI.getHeapAllocMarker());
 }
 
 uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
@@ -1515,7 +1506,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
 
     LLT TypeToPrint = MRI ? getTypeToPrint(StartOp, PrintedTypes, *MRI) : LLT{};
     unsigned TiedOperandIdx = getTiedOperandIdx(StartOp);
-    MO.print(OS, MST, TypeToPrint, /*PrintDef=*/false, IsStandalone,
+    MO.print(OS, MST, TypeToPrint, StartOp, /*PrintDef=*/false, IsStandalone,
              ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
     ++StartOp;
   }
@@ -1547,8 +1538,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     OS << "nsw ";
   if (getFlag(MachineInstr::IsExact))
     OS << "exact ";
-  if (getFlag(MachineInstr::FPExcept))
-    OS << "fpexcept ";
+  if (getFlag(MachineInstr::NoFPExcept))
+    OS << "nofpexcept ";
 
   // Print the opcode name.
   if (TII)
@@ -1570,7 +1561,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     const unsigned OpIdx = InlineAsm::MIOp_AsmString;
     LLT TypeToPrint = MRI ? getTypeToPrint(OpIdx, PrintedTypes, *MRI) : LLT{};
     unsigned TiedOperandIdx = getTiedOperandIdx(OpIdx);
-    getOperand(OpIdx).print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+    getOperand(OpIdx).print(OS, MST, TypeToPrint, OpIdx, /*PrintDef=*/true, IsStandalone,
                             ShouldPrintRegisterTies, TiedOperandIdx, TRI,
                             IntrinsicInfo);
 
@@ -1609,7 +1600,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
       else {
         LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
         unsigned TiedOperandIdx = getTiedOperandIdx(i);
-        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+        MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
                  ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
       }
     } else if (isDebugLabel() && MO.isMetadata()) {
@@ -1620,7 +1611,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
       else {
         LLT TypeToPrint = MRI ? getTypeToPrint(i, PrintedTypes, *MRI) : LLT{};
         unsigned TiedOperandIdx = getTiedOperandIdx(i);
-        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+        MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
                  ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
       }
     } else if (i == AsmDescOp && MO.isImm()) {
@@ -1687,7 +1678,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
       if (MO.isImm() && isOperandSubregIdx(i))
         MachineOperand::printSubRegIdx(OS, MO.getImm(), TRI);
       else
-        MO.print(OS, MST, TypeToPrint, /*PrintDef=*/true, IsStandalone,
+        MO.print(OS, MST, TypeToPrint, i, /*PrintDef=*/true, IsStandalone,
                  ShouldPrintRegisterTies, TiedOperandIdx, TRI, IntrinsicInfo);
     }
   }
@@ -1710,6 +1701,14 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     OS << " post-instr-symbol ";
     MachineOperand::printSymbol(OS, *PostInstrSymbol);
   }
+  if (MDNode *HeapAllocMarker = getHeapAllocMarker()) {
+    if (!FirstOp) {
+      FirstOp = false;
+      OS << ',';
+    }
+    OS << " heap-alloc-marker ";
+    HeapAllocMarker->printAsOperand(OS, MST);
+  }
 
   if (!SkipDebugLoc) {
     if (const DebugLoc &DL = getDebugLoc()) {
@@ -1978,7 +1977,7 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
 unsigned
 MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
   // Build up a buffer of hash code components.
-  SmallVector<size_t, 8> HashComponents;
+  SmallVector<size_t, 16> HashComponents;
   HashComponents.reserve(MI->getNumOperands() + 1);
   HashComponents.push_back(MI->getOpcode());
   for (const MachineOperand &MO : MI->operands()) {
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index feb849ced353..94865b0e9031 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Target/TargetMachine.h"
 #include <utility>
 using namespace llvm;
@@ -277,22 +278,18 @@ bool llvm::finalizeBundles(MachineFunction &MF) {
   return Changed;
 }
 
-//===----------------------------------------------------------------------===//
-// MachineOperand iterator
-//===----------------------------------------------------------------------===//
-
-MachineOperandIteratorBase::VirtRegInfo
-MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
-                    SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
-  VirtRegInfo RI = { false, false, false };
-  for(; isValid(); ++*this) {
-    MachineOperand &MO = deref();
+VirtRegInfo llvm::AnalyzeVirtRegInBundle(
+    MachineInstr &MI, unsigned Reg,
+    SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops) {
+  VirtRegInfo RI = {false, false, false};
+  for (MIBundleOperands O(MI); O.isValid(); ++O) {
+    MachineOperand &MO = *O;
     if (!MO.isReg() || MO.getReg() != Reg)
       continue;
 
     // Remember each (MI, OpNo) that refers to Reg.
     if (Ops)
-      Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+      Ops->push_back(std::make_pair(MO.getParent(), O.getOperandNo()));
 
     // Both defs and uses can read virtual registers.
     if (MO.readsReg()) {
@@ -304,22 +301,22 @@ MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
     // Only defs can write.
     if (MO.isDef())
       RI.Writes = true;
-    else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+    else if (!RI.Tied &&
+             MO.getParent()->isRegTiedToDefOperand(O.getOperandNo()))
       RI.Tied = true;
   }
   return RI;
 }
 
-MachineOperandIteratorBase::PhysRegInfo
-MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
-                                           const TargetRegisterInfo *TRI) {
+PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, unsigned Reg,
+                                         const TargetRegisterInfo *TRI) {
   bool AllDefsDead = true;
   PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
 
   assert(Register::isPhysicalRegister(Reg) &&
          "analyzePhysReg not given a physical register!");
-  for (; isValid(); ++*this) {
-    MachineOperand &MO = deref();
+  for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+    const MachineOperand &MO = *O;
 
     if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
       PRI.Clobbered = true;
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 6a898ff6ef88..462d4d3b3726 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -39,6 +40,7 @@
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
@@ -74,6 +76,27 @@ static cl::opt<bool>
 HoistConstStores("hoist-const-stores",
                  cl::desc("Hoist invariant stores"),
                  cl::init(true), cl::Hidden);
+// The default threshold of 100 (i.e. if target block is 100 times hotter)
+// is based on empirical data on a single target and is subject to tuning.
+static cl::opt<unsigned>
+BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
+                             cl::desc("Do not hoist instructions if target"
+                             "block is N times hotter than the source."),
+                             cl::init(100), cl::Hidden);
+
+enum class UseBFI { None, PGO, All };
+
+static cl::opt<UseBFI>
+DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
+                              cl::desc("Disable hoisting instructions to"
+                              " hotter blocks"),
+                              cl::init(UseBFI::None), cl::Hidden,
+                              cl::values(clEnumValN(UseBFI::None, "none",
+                              "disable the feature"),
+                              clEnumValN(UseBFI::PGO, "pgo",
+                              "enable the feature when using profile data"),
+                              clEnumValN(UseBFI::All, "all",
+                              "enable the feature with/wo profile data")));
 
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
@@ -87,6 +110,8 @@ STATISTIC(NumPostRAHoisted,
           "Number of machine instructions hoisted out of loops post regalloc");
 STATISTIC(NumStoreConst,
           "Number of stores of const phys reg hoisted out of loops");
+STATISTIC(NumNotHoistedDueToHotness,
+          "Number of instructions not hoisted due to block frequency");
 
 namespace {
 
@@ -98,9 +123,11 @@ namespace {
     MachineRegisterInfo *MRI;
     TargetSchedModel SchedModel;
     bool PreRegAlloc;
+    bool HasProfileData;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
+    MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
     MachineLoopInfo      *MLI;     // Current MachineLoopInfo
     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
 
@@ -150,6 +177,8 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineLoopInfo>();
+      if (DisableHoistingToHotterBlocks != UseBFI::None)
+        AU.addRequired<MachineBlockFrequencyInfo>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<AAResultsWrapperPass>();
       AU.addPreserved<MachineLoopInfo>();
@@ -245,6 +274,8 @@ namespace {
 
     void InitCSEMap(MachineBasicBlock *BB);
 
+    bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+                            MachineBasicBlock *TgtBlock);
     MachineBasicBlock *getCurPreheader();
   };
 
@@ -275,6 +306,7 @@ char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
 INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE,
                       "Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
@@ -283,6 +315,7 @@ INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE,
 INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
                       "Early Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
@@ -315,6 +348,7 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
   SchedModel.init(&ST);
 
   PreRegAlloc = MRI->isSSA();
+  HasProfileData = MF.getFunction().hasProfileData();
 
   if (PreRegAlloc)
     LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
@@ -333,6 +367,8 @@ bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
   }
 
   // Get our Loop information...
+  if (DisableHoistingToHotterBlocks != UseBFI::None)
+    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   MLI = &getAnalysis<MachineLoopInfo>();
   DT  = &getAnalysis<MachineDominatorTree>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
@@ -1433,6 +1469,15 @@ bool MachineLICMBase::MayCSE(MachineInstr *MI) {
 /// that are safe to hoist, this instruction is called to do the dirty work.
 /// It returns true if the instruction is hoisted.
 bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+  MachineBasicBlock *SrcBlock = MI->getParent();
+
+  // Disable the instruction hoisting due to block hotness
+  if ((DisableHoistingToHotterBlocks == UseBFI::All ||
+      (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
+      isTgtHotterThanSrc(SrcBlock, Preheader)) {
+    ++NumNotHoistedDueToHotness;
+    return false;
+  }
   // First check whether we should hoist this instruction.
   if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
     // If not, try unfolding a hoistable load.
@@ -1526,3 +1571,21 @@ MachineBasicBlock *MachineLICMBase::getCurPreheader() {
   }
   return CurPreheader;
 }
+
+/// Is the target basic block at least "BlockFrequencyRatioThreshold"
+/// times hotter than the source basic block.
+bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
+                                         MachineBasicBlock *TgtBlock) {
+  // Parse source and target basic block frequency from MBFI
+  uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
+  uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
+
+  // Disable the hoisting if source block frequency is zero
+  if (!SrcBF)
+    return true;
+
+  double Ratio = (double)DstBF / SrcBF;
+
+  // Compare the block frequency ratio with the threshold
+  return Ratio > BlockFrequencyRatioThreshold;
+}
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 3b8b430d1b0f..0c1439da9b29 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -27,6 +28,9 @@ template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
 template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
 
 char MachineLoopInfo::ID = 0;
+MachineLoopInfo::MachineLoopInfo() : MachineFunctionPass(ID) {
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+}
 INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
                 "Machine Natural Loop Construction", true, true)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -36,11 +40,15 @@ INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
 char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
 
 bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
-  releaseMemory();
-  LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
+  calculate(getAnalysis<MachineDominatorTree>());
   return false;
 }
 
+void MachineLoopInfo::calculate(MachineDominatorTree &MDT) {
+  releaseMemory();
+  LI.analyze(MDT.getBase());
+}
+
 void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<MachineDominatorTree>();
diff --git a/llvm/lib/CodeGen/MachineLoopUtils.cpp b/llvm/lib/CodeGen/MachineLoopUtils.cpp
index e074b76082f0..cf30e28449cd 100644
--- a/llvm/lib/CodeGen/MachineLoopUtils.cpp
+++ b/llvm/lib/CodeGen/MachineLoopUtils.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineLoopUtils.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -130,3 +131,14 @@ MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
 
   return NewBB;
 }
+
+bool llvm::isRegLiveInExitBlocks(MachineLoop *Loop, int PhysReg) {
+  SmallVector<MachineBasicBlock *, 4> ExitBlocks;
+  Loop->getExitBlocks(ExitBlocks);
+
+  for (auto *MBB : ExitBlocks)
+    if (MBB->isLiveIn(PhysReg))
+      return true;
+
+  return false;
+}
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index e0b4e9cac229..0094a923e039 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -20,8 +20,10 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -116,7 +118,17 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
   BBCallbacks.back().setMap(this);
   Entry.Index = BBCallbacks.size() - 1;
   Entry.Fn = BB->getParent();
-  Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
+  MCSymbol *Sym = Context.createTempSymbol(!BB->hasAddressTaken());
+  if (Context.getObjectFileInfo()->getTargetTriple().isOSBinFormatXCOFF()) {
+    MCSymbol *FnEntryPointSym =
+        Context.lookupSymbol("." + Entry.Fn->getName());
+    assert(FnEntryPointSym && "The function entry pointer symbol should have"
+		              " already been initialized.");
+    MCSectionXCOFF *Csect =
+        cast<MCSymbolXCOFF>(FnEntryPointSym)->getContainingCsect();
+    cast<MCSymbolXCOFF>(Sym)->setContainingCsect(Csect);
+  }
+  Entry.Symbols.push_back(Sym);
   return Entry.Symbols;
 }
 
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 8b19501ec3cf..7b8f01100929 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MIRFormatter.h"
 #include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -458,28 +459,6 @@ static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
     OS << "<unknown>";
 }
 
-static void printIRValueReference(raw_ostream &OS, const Value &V,
-                                  ModuleSlotTracker &MST) {
-  if (isa<GlobalValue>(V)) {
-    V.printAsOperand(OS, /*PrintType=*/false, MST);
-    return;
-  }
-  if (isa<Constant>(V)) {
-    // Machine memory operands can load/store to/from constant value pointers.
-    OS << '`';
-    V.printAsOperand(OS, /*PrintType=*/true, MST);
-    OS << '`';
-    return;
-  }
-  OS << "%ir.";
-  if (V.hasName()) {
-    printLLVMNameWithoutPrefix(OS, V.getName());
-    return;
-  }
-  int Slot = MST.getCurrentFunction() ? MST.getLocalSlot(&V) : -1;
-  MachineOperand::printIRSlotNumber(OS, Slot);
-}
-
 static void printSyncScope(raw_ostream &OS, const LLVMContext &Context,
                            SyncScope::ID SSID,
                            SmallVectorImpl<StringRef> &SSNs) {
@@ -734,14 +713,15 @@ void MachineOperand::print(raw_ostream &OS, LLT TypeToPrint,
                            const TargetIntrinsicInfo *IntrinsicInfo) const {
   tryToGetTargetInfo(*this, TRI, IntrinsicInfo);
   ModuleSlotTracker DummyMST(nullptr);
-  print(OS, DummyMST, TypeToPrint, /*PrintDef=*/false, /*IsStandalone=*/true,
+  print(OS, DummyMST, TypeToPrint, None, /*PrintDef=*/false,
+        /*IsStandalone=*/true,
         /*ShouldPrintRegisterTies=*/true,
         /*TiedOperandIdx=*/0, TRI, IntrinsicInfo);
 }
 
 void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
-                           LLT TypeToPrint, bool PrintDef, bool IsStandalone,
-                           bool ShouldPrintRegisterTies,
+                           LLT TypeToPrint, Optional<unsigned> OpIdx, bool PrintDef,
+                           bool IsStandalone, bool ShouldPrintRegisterTies,
                            unsigned TiedOperandIdx,
                            const TargetRegisterInfo *TRI,
                            const TargetIntrinsicInfo *IntrinsicInfo) const {
@@ -802,9 +782,19 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
       OS << '(' << TypeToPrint << ')';
     break;
   }
-  case MachineOperand::MO_Immediate:
-    OS << getImm();
+  case MachineOperand::MO_Immediate: {
+    const MIRFormatter *Formatter = nullptr;
+    if (const MachineFunction *MF = getMFIfAvailable(*this)) {
+      const auto *TII = MF->getSubtarget().getInstrInfo();
+      assert(TII && "expected instruction info");
+      Formatter = TII->getMIRFormatter();
+    }
+    if (Formatter)
+      Formatter->printImm(OS, *getParent(), OpIdx, getImm());
+    else
+      OS << getImm();
     break;
+  }
   case MachineOperand::MO_CImmediate:
     getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
     break;
@@ -940,13 +930,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
   }
   case MachineOperand::MO_ShuffleMask:
     OS << "shufflemask(";
-    const Constant* C = getShuffleMask();
-    const int NumElts = C->getType()->getVectorNumElements();
-
+    ArrayRef<int> Mask = getShuffleMask();
     StringRef Separator;
-    for (int I = 0; I != NumElts; ++I) {
-      OS << Separator;
-      C->getAggregateElement(I)->printAsOperand(OS, false, MST);
+    for (int Elt : Mask) {
+      if (Elt == -1)
+        OS << Separator << "undef";
+      else
+        OS << Separator << Elt;
       Separator = ", ";
     }
 
@@ -1111,7 +1101,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
 
   if (const Value *Val = getValue()) {
     OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
-    printIRValueReference(OS, *Val, MST);
+    MIRFormatter::printIRValue(OS, *Val, MST);
   } else if (const PseudoSourceValue *PVal = getPseudoValue()) {
     OS << ((isLoad() && isStore()) ? " on " : isLoad() ? " from " : " into ");
     assert(PVal && "Expected a pseudo source value");
@@ -1144,15 +1134,18 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
       printLLVMNameWithoutPrefix(
           OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
       break;
-    default:
+    default: {
+      const MIRFormatter *Formatter = TII->getMIRFormatter();
       // FIXME: This is not necessarily the correct MIR serialization format for
       // a custom pseudo source value, but at least it allows
       // -print-machineinstrs to work on a target with custom pseudo source
       // values.
-      OS << "custom ";
-      PVal->printCustom(OS);
+      OS << "custom \"";
+      Formatter->printCustomPseudoSourceValue(OS, MST, *PVal);
+      OS << '\"';
       break;
     }
+    }
   }
   MachineOperand::printOperandOffset(OS, getOffset());
   if (getBaseAlignment() != getSize())
diff --git a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index b82403ae1b85..d656953f9115 100644
--- a/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/llvm/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 8cd66825a58a..3a9104bda0d1 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -68,6 +68,7 @@
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Mangler.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -91,8 +92,7 @@ STATISTIC(FunctionsCreated, "Number of functions created");
 // this is off by default. It should, however, be the default behaviour in
 // LTO.
 static cl::opt<bool> EnableLinkOnceODROutlining(
-    "enable-linkonceodr-outlining",
-    cl::Hidden,
+    "enable-linkonceodr-outlining", cl::Hidden,
     cl::desc("Enable the machine outliner on linkonceodr functions"),
     cl::init(false));
 
@@ -253,7 +253,7 @@ private:
   /// Ukkonen's algorithm.
   struct ActiveState {
     /// The next node to insert at.
-    SuffixTreeNode *Node;
+    SuffixTreeNode *Node = nullptr;
 
     /// The index of the first character in the substring currently being added.
     unsigned Idx = EmptyIdx;
@@ -301,8 +301,8 @@ private:
            "Non-root internal nodes must have parents!");
 
     unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
-    SuffixTreeNode *N = new (NodeAllocator.Allocate())
-        SuffixTreeNode(StartIdx, E, Root);
+    SuffixTreeNode *N =
+        new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root);
     if (Parent)
       Parent->Children[Edge] = N;
 
@@ -311,26 +311,31 @@ private:
 
   /// Set the suffix indices of the leaves to the start indices of their
   /// respective suffixes.
-  ///
-  /// \param[in] CurrNode The node currently being visited.
-  /// \param CurrNodeLen The concatenation of all node sizes from the root to
-  /// this node. Used to produce suffix indices.
-  void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrNodeLen) {
-
-    bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot();
-
-    // Store the concatenation of lengths down from the root.
-    CurrNode.ConcatLen = CurrNodeLen;
-    // Traverse the tree depth-first.
-    for (auto &ChildPair : CurrNode.Children) {
-      assert(ChildPair.second && "Node had a null child!");
-      setSuffixIndices(*ChildPair.second,
-                       CurrNodeLen + ChildPair.second->size());
-    }
+  void setSuffixIndices() {
+    // List of nodes we need to visit along with the current length of the
+    // string.
+    std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
+
+    // Current node being visited.
+    SuffixTreeNode *CurrNode = Root;
+
+    // Sum of the lengths of the nodes down the path to the current one.
+    unsigned CurrNodeLen = 0;
+    ToVisit.push_back({CurrNode, CurrNodeLen});
+    while (!ToVisit.empty()) {
+      std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
+      ToVisit.pop_back();
+      CurrNode->ConcatLen = CurrNodeLen;
+      for (auto &ChildPair : CurrNode->Children) {
+        assert(ChildPair.second && "Node had a null child!");
+        ToVisit.push_back(
+            {ChildPair.second, CurrNodeLen + ChildPair.second->size()});
+      }
 
-    // Is this node a leaf? If it is, give it a suffix index.
-    if (IsLeaf)
-      CurrNode.SuffixIdx = Str.size() - CurrNodeLen;
+      // No children, so we are at the end of the string.
+      if (CurrNode->Children.size() == 0 && !CurrNode->isRoot())
+        CurrNode->SuffixIdx = Str.size() - CurrNodeLen;
+    }
   }
 
   /// Construct the suffix tree for the prefix of the input ending at
@@ -473,7 +478,6 @@ public:
     // Keep track of the number of suffixes we have to add of the current
     // prefix.
     unsigned SuffixesToAdd = 0;
-    Active.Node = Root;
 
     // Construct the suffix tree iteratively on each prefix of the string.
     // PfxEndIdx is the end index of the current prefix.
@@ -487,13 +491,12 @@ public:
 
     // Set the suffix indices of each leaf.
     assert(Root && "Root node can't be nullptr!");
-    setSuffixIndices(*Root, 0);
+    setSuffixIndices();
   }
 
-
   /// Iterator for finding all repeated substrings in the suffix tree.
   struct RepeatedSubstringIterator {
-    private:
+  private:
     /// The current node we're visiting.
     SuffixTreeNode *N = nullptr;
 
@@ -595,7 +598,7 @@ public:
         advance();
       }
     }
-};
+  };
 
   typedef RepeatedSubstringIterator iterator;
   iterator begin() { return iterator(Root); }
@@ -694,9 +697,10 @@ struct InstructionMapper {
   /// IllegalInstrNumber.
   ///
   /// \returns The integer that \p *It was mapped to.
-  unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It,
-  bool &CanOutlineWithPrevInstr, std::vector<unsigned> &UnsignedVecForMBB,
-  std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+  unsigned mapToIllegalUnsigned(
+      MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
+      std::vector<unsigned> &UnsignedVecForMBB,
+      std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
     // Can't outline an illegal instruction. Set the flag.
     CanOutlineWithPrevInstr = false;
 
@@ -764,12 +768,12 @@ struct InstructionMapper {
     std::vector<unsigned> UnsignedVecForMBB;
     std::vector<MachineBasicBlock::iterator> InstrListForMBB;
 
-    for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; It++) {
+    for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) {
       // Keep track of where this instruction is in the module.
       switch (TII.getOutliningType(It, Flags)) {
       case InstrType::Illegal:
-        mapToIllegalUnsigned(It, CanOutlineWithPrevInstr,
-                             UnsignedVecForMBB, InstrListForMBB);
+        mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+                             InstrListForMBB);
         break;
 
       case InstrType::Legal:
@@ -783,7 +787,7 @@ struct InstructionMapper {
         // The instruction also acts as a terminator, so we have to record that
         // in the string.
         mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
-        InstrListForMBB);
+                             InstrListForMBB);
         break;
 
       case InstrType::Invisible:
@@ -802,7 +806,7 @@ struct InstructionMapper {
       // boundaries since the "end" is encoded uniquely and thus appears in no
       // repeated substring.
       mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
-      InstrListForMBB);
+                           InstrListForMBB);
       InstrList.insert(InstrList.end(), InstrListForMBB.begin(),
                        InstrListForMBB.end());
       UnsignedVec.insert(UnsignedVec.end(), UnsignedVecForMBB.begin(),
@@ -888,24 +892,27 @@ struct MachineOutliner : public ModulePass {
   /// \param FunctionList A list of functions to be inserted into the module.
   /// \param Mapper Contains the instruction mappings for the module.
   bool outline(Module &M, std::vector<OutlinedFunction> &FunctionList,
-               InstructionMapper &Mapper);
+               InstructionMapper &Mapper, unsigned &OutlinedFunctionNum);
 
   /// Creates a function for \p OF and inserts it into the module.
   MachineFunction *createOutlinedFunction(Module &M, OutlinedFunction &OF,
                                           InstructionMapper &Mapper,
                                           unsigned Name);
 
+  /// Calls 'doOutline()'.
+  bool runOnModule(Module &M) override;
+
   /// Construct a suffix tree on the instructions in \p M and outline repeated
   /// strings from that tree.
-  bool runOnModule(Module &M) override;
+  bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
 
   /// Return a DISubprogram for OF if one exists, and null otherwise. Helper
   /// function for remark emission.
   DISubprogram *getSubprogramOrNull(const OutlinedFunction &OF) {
-    DISubprogram *SP;
     for (const Candidate &C : OF.Candidates)
-      if (C.getMF() && (SP = C.getMF()->getFunction().getSubprogram()))
-        return SP;
+      if (MachineFunction *MF = C.getMF())
+        if (DISubprogram *SP = MF->getFunction().getSubprogram())
+          return SP;
     return nullptr;
   }
 
@@ -918,15 +925,14 @@ struct MachineOutliner : public ModulePass {
   /// FIXME: This should be handled by the pass manager, not the outliner.
   /// FIXME: This is nearly identical to the initSizeRemarkInfo in the legacy
   /// pass manager.
-  void initSizeRemarkInfo(
-      const Module &M, const MachineModuleInfo &MMI,
-      StringMap<unsigned> &FunctionToInstrCount);
+  void initSizeRemarkInfo(const Module &M, const MachineModuleInfo &MMI,
+                          StringMap<unsigned> &FunctionToInstrCount);
 
   /// Emit the remark.
   // FIXME: This should be handled by the pass manager, not the outliner.
-  void emitInstrCountChangedRemark(
-      const Module &M, const MachineModuleInfo &MMI,
-      const StringMap<unsigned> &FunctionToInstrCount);
+  void
+  emitInstrCountChangedRemark(const Module &M, const MachineModuleInfo &MMI,
+                              const StringMap<unsigned> &FunctionToInstrCount);
 };
 } // Anonymous namespace.
 
@@ -1003,13 +1009,12 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
   MORE.emit(R);
 }
 
-void
-MachineOutliner::findCandidates(InstructionMapper &Mapper,
-                                std::vector<OutlinedFunction> &FunctionList) {
+void MachineOutliner::findCandidates(
+    InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
   FunctionList.clear();
   SuffixTree ST(Mapper.UnsignedVec);
 
-  // First, find dall of the repeated substrings in the tree of minimum length
+  // First, find all of the repeated substrings in the tree of minimum length
   // 2.
   std::vector<Candidate> CandidatesForRepeatedSeq;
   for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) {
@@ -1087,10 +1092,8 @@ MachineOutliner::findCandidates(InstructionMapper &Mapper,
   }
 }
 
-MachineFunction *
-MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
-                                        InstructionMapper &Mapper,
-                                        unsigned Name) {
+MachineFunction *MachineOutliner::createOutlinedFunction(
+    Module &M, OutlinedFunction &OF, InstructionMapper &Mapper, unsigned Name) {
 
   // Create the function name. This should be unique.
   // FIXME: We should have a better naming scheme. This should be stable,
@@ -1190,13 +1193,11 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
 
 bool MachineOutliner::outline(Module &M,
                               std::vector<OutlinedFunction> &FunctionList,
-                              InstructionMapper &Mapper) {
+                              InstructionMapper &Mapper,
+                              unsigned &OutlinedFunctionNum) {
 
   bool OutlinedSomething = false;
 
-  // Number to append to the current outlined function.
-  unsigned OutlinedFunctionNum = 0;
-
   // Sort by benefit. The most beneficial functions should be outlined first.
   llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
                                      const OutlinedFunction &RHS) {
@@ -1303,12 +1304,6 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
     if (F.empty())
       continue;
 
-    // Disable outlining from noreturn functions right now. Noreturn requires
-    // special handling for the case where what we are outlining could be a
-    // tail call.
-    if (F.hasFnAttribute(Attribute::NoReturn))
-      continue;
-
     // There's something in F. Check if it has a MachineFunction associated with
     // it.
     MachineFunction *MF = MMI.getMachineFunction(F);
@@ -1403,8 +1398,7 @@ void MachineOutliner::emitInstrCountChangedRemark(
     MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
     MORE.emit([&]() {
       MachineOptimizationRemarkAnalysis R("size-info", "FunctionMISizeChange",
-                                          DiagnosticLocation(),
-                                          &MF->front());
+                                          DiagnosticLocation(), &MF->front());
       R << DiagnosticInfoOptimizationBase::Argument("Pass", "Machine Outliner")
         << ": Function: "
         << DiagnosticInfoOptimizationBase::Argument("Function", F.getName())
@@ -1427,6 +1421,15 @@ bool MachineOutliner::runOnModule(Module &M) {
   if (M.empty())
     return false;
 
+  // Number to append to the current outlined function.
+  unsigned OutlinedFunctionNum = 0;
+
+  if (!doOutline(M, OutlinedFunctionNum))
+    return false;
+  return true;
+}
+
+bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
   MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
 
   // If the user passed -enable-machine-outliner=always or
@@ -1434,14 +1437,14 @@ bool MachineOutliner::runOnModule(Module &M) {
   // Otherwise, if the target supports default outlining, it will run on all
   // functions deemed by the target to be worth outlining from by default. Tell
   // the user how the outliner is running.
-  LLVM_DEBUG(
+  LLVM_DEBUG({
     dbgs() << "Machine Outliner: Running on ";
     if (RunOnAllFunctions)
       dbgs() << "all functions";
     else
       dbgs() << "target-default functions";
-    dbgs() << "\n"
-  );
+    dbgs() << "\n";
+  });
 
   // If the user specifies that they want to outline from linkonceodrs, set
   // it here.
@@ -1470,7 +1473,8 @@ bool MachineOutliner::runOnModule(Module &M) {
     initSizeRemarkInfo(M, MMI, FunctionToInstrCount);
 
   // Outline each of the candidates and return true if something was outlined.
-  bool OutlinedSomething = outline(M, FunctionList, Mapper);
+  bool OutlinedSomething =
+      outline(M, FunctionList, Mapper, OutlinedFunctionNum);
 
   // If we outlined something, we definitely changed the MI count of the
   // module. If we've asked for size remarks, then output them.
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 89c9f6093a97..ef22caa877c9 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1314,8 +1314,9 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
     // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
     // SUnit to the container.
     SmallVector<SUnit *, 8> UseSUs;
-    for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
-      for (auto &Dep : (*I)->Succs) {
+    // Do not use iterator based loop here as we are updating the container.
+    for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
+      for (auto &Dep : PHISUs[Index]->Succs) {
         if (Dep.getKind() != SDep::Data)
           continue;
 
diff --git a/llvm/lib/CodeGen/MachinePostDominators.cpp b/llvm/lib/CodeGen/MachinePostDominators.cpp
index f4daff667e86..fb96d0efa4d4 100644
--- a/llvm/lib/CodeGen/MachinePostDominators.cpp
+++ b/llvm/lib/CodeGen/MachinePostDominators.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/MachineRegionInfo.cpp b/llvm/lib/CodeGen/MachineRegionInfo.cpp
index 2961d456be0d..45cdcbfeab9f 100644
--- a/llvm/lib/CodeGen/MachineRegionInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegionInfo.cpp
@@ -11,6 +11,7 @@
 #include "llvm/Analysis/RegionInfoImpl.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index f0721ea3b76d..e42701b9c6ca 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -48,6 +48,7 @@
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
@@ -238,6 +239,7 @@ void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -402,7 +404,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   if (EnablePostRAMachineSched.getNumOccurrences()) {
     if (!EnablePostRAMachineSched)
       return false;
-  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
+  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
     LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
     return false;
   }
@@ -412,6 +414,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   MLI = &getAnalysis<MachineLoopInfo>();
   PassConfig = &getAnalysis<TargetPassConfig>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   if (VerifyScheduling)
     MF->verify(this, "Before post machine scheduling.");
@@ -1495,7 +1498,7 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
                                 : BaseOp->getIndex() < RHS.BaseOp->getIndex();
 
         if (Offset != RHS.Offset)
-          return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
+          return Offset < RHS.Offset;
 
         return SU->NodeNum < RHS.SU->NodeNum;
       }
@@ -1570,6 +1573,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
     SUnit *SUa = MemOpRecords[Idx].SU;
     SUnit *SUb = MemOpRecords[Idx+1].SU;
+    if (SUa->NodeNum > SUb->NodeNum)
+      std::swap(SUa, SUb);
     if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
                                  *MemOpRecords[Idx + 1].BaseOp,
                                  ClusterLength) &&
@@ -1595,10 +1600,8 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
 
 /// Callback from DAG postProcessing to create cluster edges for loads.
 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
-  // Map DAG NodeNum to store chain ID.
-  DenseMap<unsigned, unsigned> StoreChainIDs;
-  // Map each store chain to a set of dependent MemOps.
-  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
+  // Map DAG NodeNum to a set of dependent MemOps in store chain.
+  DenseMap<unsigned, SmallVector<SUnit *, 4>> StoreChains;
   for (SUnit &SU : DAG->SUnits) {
     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
         (!IsLoad && !SU.getInstr()->mayStore()))
@@ -1611,19 +1614,14 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
         break;
       }
     }
-    // Check if this chain-like pred has been seen
-    // before. ChainPredID==MaxNodeID at the top of the schedule.
-    unsigned NumChains = StoreChainDependents.size();
-    std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
-      StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
-    if (Result.second)
-      StoreChainDependents.resize(NumChains + 1);
-    StoreChainDependents[Result.first->second].push_back(&SU);
+    // Insert the SU to corresponding store chain.
+    auto &Chain = StoreChains.FindAndConstruct(ChainPredID).second;
+    Chain.push_back(&SU);
   }
 
   // Iterate over the store chains.
-  for (auto &SCD : StoreChainDependents)
-    clusterNeighboringMemOps(SCD, DAG);
+  for (auto &SCD : StoreChains)
+    clusterNeighboringMemOps(SCD.second, DAG);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2085,7 +2083,8 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
   return OtherCritCount;
 }
 
-void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
+                                unsigned Idx) {
   assert(SU->getInstr() && "Scheduled SUnit must have instr");
 
 #ifndef NDEBUG
@@ -2102,11 +2101,19 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
   // Check for interlocks first. For the purpose of other heuristics, an
   // instruction that cannot issue appears as if it's not in the ReadyQueue.
   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
-  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
-      Available.size() >= ReadyListLimit)
-    Pending.push(SU);
-  else
+  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
+                        checkHazard(SU) || (Available.size() >= ReadyListLimit);
+
+  if (!HazardDetected) {
     Available.push(SU);
+
+    if (InPQueue)
+      Pending.remove(Pending.begin() + Idx);
+    return;
+  }
+
+  if (!InPQueue)
+    Pending.push(SU);
 }
 
 /// Move the boundary of scheduled code by one cycle.
@@ -2346,26 +2353,21 @@ void SchedBoundary::releasePending() {
 
   // Check to see if any of the pending instructions are ready to issue.  If
   // so, add them to the available queue.
-  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
-  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
-    SUnit *SU = *(Pending.begin()+i);
+  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
+    SUnit *SU = *(Pending.begin() + I);
     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
 
     if (ReadyCycle < MinReadyCycle)
       MinReadyCycle = ReadyCycle;
 
-    if (!IsBuffered && ReadyCycle > CurrCycle)
-      continue;
-
-    if (checkHazard(SU))
-      continue;
-
     if (Available.size() >= ReadyListLimit)
       break;
 
-    Available.push(SU);
-    Pending.remove(Pending.begin()+i);
-    --i; --e;
+    releaseNode(SU, ReadyCycle, true, I);
+    if (E != Pending.size()) {
+      --I;
+      --E;
+    }
   }
   CheckPending = false;
 }
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 27a2e7023f22..a4ba197b7a1d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -15,6 +15,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -38,6 +40,7 @@
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
@@ -105,6 +108,25 @@ namespace {
     using AllSuccsCache =
         std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
 
+    /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
+    /// post-dominated by another DBG_VALUE of the same variable location.
+    /// This is necessary to detect sequences such as:
+    ///     %0 = someinst
+    ///     DBG_VALUE %0, !123, !DIExpression()
+    ///     %1 = anotherinst
+    ///     DBG_VALUE %1, !123, !DIExpression()
+    /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
+    /// would re-order assignments.
+    using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
+
+    /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
+    /// debug instructions to sink.
+    SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
+
+    /// Record of debug variables that have had their locations set in the
+    /// current block.
+    DenseSet<DebugVariable> SeenDbgVars;
+
   public:
     static char ID; // Pass identification
 
@@ -132,6 +154,7 @@ namespace {
 
   private:
     bool ProcessBlock(MachineBasicBlock &MBB);
+    void ProcessDbgInst(MachineInstr &MI);
     bool isWorthBreakingCriticalEdge(MachineInstr &MI,
                                      MachineBasicBlock *From,
                                      MachineBasicBlock *To);
@@ -153,8 +176,14 @@ namespace {
                                    MachineBasicBlock *To,
                                    bool BreakPHIEdge);
     bool SinkInstruction(MachineInstr &MI, bool &SawStore,
-
                          AllSuccsCache &AllSuccessors);
+
+    /// If we sink a COPY inst, some debug users of it's destination may no
+    /// longer be dominated by the COPY, and will eventually be dropped.
+    /// This is easily rectified by forwarding the non-dominated debug uses
+    /// to the copy source.
+    void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
+                                       MachineBasicBlock *TargetBlock);
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
                                  MachineBasicBlock *DefMBB,
                                  bool &BreakPHIEdge, bool &LocalUse) const;
@@ -367,8 +396,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     if (!ProcessedBegin)
       --I;
 
-    if (MI.isDebugInstr())
+    if (MI.isDebugInstr()) {
+      if (MI.isDebugValue())
+        ProcessDbgInst(MI);
       continue;
+    }
 
     bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
     if (Joined) {
@@ -384,9 +416,29 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
     // If we just processed the first instruction in the block, we're done.
   } while (!ProcessedBegin);
 
+  SeenDbgUsers.clear();
+  SeenDbgVars.clear();
+
   return MadeChange;
 }
 
+void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
+  // When we see DBG_VALUEs for registers, record any vreg it reads, so that
+  // we know what to sink if the vreg def sinks.
+  assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
+
+  DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+                    MI.getDebugLoc()->getInlinedAt());
+  bool SeenBefore = SeenDbgVars.count(Var) != 0;
+
+  MachineOperand &MO = MI.getOperand(0);
+  if (MO.isReg() && MO.getReg().isVirtual())
+    SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
+
+  // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
+  SeenDbgVars.insert(Var);
+}
+
 bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
                                                  MachineBasicBlock *From,
                                                  MachineBasicBlock *To) {
@@ -731,18 +783,60 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
          MBP.LHS.getReg() == BaseOp->getReg();
 }
 
-/// Sink an instruction and its associated debug instructions. If the debug
-/// instructions to be sunk are already known, they can be provided in DbgVals.
+/// If the sunk instruction is a copy, try to forward the copy instead of
+/// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
+/// there's any subregister weirdness involved. Returns true if copy
+/// propagation occurred.
+static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI) {
+  const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
+  const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
+
+  // Copy DBG_VALUE operand and set the original to undef. We then check to
+  // see whether this is something that can be copy-forwarded. If it isn't,
+  // continue around the loop.
+  MachineOperand DbgMO = DbgMI.getOperand(0);
+
+  const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
+  auto CopyOperands = TII.isCopyInstr(SinkInst);
+  if (!CopyOperands)
+    return false;
+  SrcMO = CopyOperands->Source;
+  DstMO = CopyOperands->Destination;
+
+  // Check validity of forwarding this copy.
+  bool PostRA = MRI.getNumVirtRegs() == 0;
+
+  // Trying to forward between physical and virtual registers is too hard.
+  if (DbgMO.getReg().isVirtual() != SrcMO->getReg().isVirtual())
+    return false;
+
+  // Only try virtual register copy-forwarding before regalloc, and physical
+  // register copy-forwarding after regalloc.
+  bool arePhysRegs = !DbgMO.getReg().isVirtual();
+  if (arePhysRegs != PostRA)
+    return false;
+
+  // Pre-regalloc, only forward if all subregisters agree (or there are no
+  // subregs at all). More analysis might recover some forwardable copies.
+  if (!PostRA && (DbgMO.getSubReg() != SrcMO->getSubReg() ||
+                  DbgMO.getSubReg() != DstMO->getSubReg()))
+    return false;
+
+  // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
+  // of this copy. Only forward the copy if the DBG_VALUE operand exactly
+  // matches the copy destination.
+  if (PostRA && DbgMO.getReg() != DstMO->getReg())
+    return false;
+
+  DbgMI.getOperand(0).setReg(SrcMO->getReg());
+  DbgMI.getOperand(0).setSubReg(SrcMO->getSubReg());
+  return true;
+}
+
+/// Sink an instruction and its associated debug instructions.
 static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
                         MachineBasicBlock::iterator InsertPos,
-                        SmallVectorImpl<MachineInstr *> *DbgVals = nullptr) {
-  // If debug values are provided use those, otherwise call collectDebugValues.
-  SmallVector<MachineInstr *, 2> DbgValuesToSink;
-  if (DbgVals)
-    DbgValuesToSink.insert(DbgValuesToSink.begin(),
-                           DbgVals->begin(), DbgVals->end());
-  else
-    MI.collectDebugValues(DbgValuesToSink);
+                        SmallVectorImpl<MachineInstr *> &DbgValuesToSink) {
 
   // If we cannot find a location to use (merge with), then we erase the debug
   // location to prevent debug-info driven tools from potentially reporting
@@ -758,13 +852,19 @@ static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
   SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
                       ++MachineBasicBlock::iterator(MI));
 
-  // Move previously adjacent debug value instructions to the insert position.
+  // Sink a copy of debug users to the insert position. Mark the original
+  // DBG_VALUE location as 'undef', indicating that any earlier variable
+  // location should be terminated as we've optimised away the value at this
+  // point.
   for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(),
                                                  DBE = DbgValuesToSink.end();
        DBI != DBE; ++DBI) {
     MachineInstr *DbgMI = *DBI;
-    SuccToSinkTo.splice(InsertPos, ParentBlock, DbgMI,
-                        ++MachineBasicBlock::iterator(DbgMI));
+    MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(*DBI);
+    SuccToSinkTo.insert(InsertPos, NewDbgMI);
+
+    if (!attemptDebugCopyProp(MI, *DbgMI))
+      DbgMI->getOperand(0).setReg(0);
   }
 }
 
@@ -882,7 +982,36 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
   while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
     ++InsertPos;
 
-  performSink(MI, *SuccToSinkTo, InsertPos);
+  // Collect debug users of any vreg that this inst defines.
+  SmallVector<MachineInstr *, 4> DbgUsersToSink;
+  for (auto &MO : MI.operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+      continue;
+    if (!SeenDbgUsers.count(MO.getReg()))
+      continue;
+
+    // Sink any users that don't pass any other DBG_VALUEs for this variable.
+    auto &Users = SeenDbgUsers[MO.getReg()];
+    for (auto &User : Users) {
+      MachineInstr *DbgMI = User.getPointer();
+      if (User.getInt()) {
+        // This DBG_VALUE would re-order assignments. If we can't copy-propagate
+        // it, it can't be recovered. Set it undef.
+        if (!attemptDebugCopyProp(MI, *DbgMI))
+          DbgMI->getOperand(0).setReg(0);
+      } else {
+        DbgUsersToSink.push_back(DbgMI);
+      }
+    }
+  }
+
+  // After sinking, some debug users may not be dominated any more. If possible,
+  // copy-propagate their operands. As it's expensive, don't do this if there's
+  // no debuginfo in the program.
+  if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
+    SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
+
+  performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
 
   // Conservatively, clear any kill flags, since it's possible that they are no
   // longer correct.
@@ -897,6 +1026,41 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
   return true;
 }
 
+void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
+    MachineInstr &MI, MachineBasicBlock *TargetBlock) {
+  assert(MI.isCopy());
+  assert(MI.getOperand(1).isReg());
+
+  // Enumerate all users of vreg operands that are def'd. Skip those that will
+  // be sunk. For the rest, if they are not dominated by the block we will sink
+  // MI into, propagate the copy source to them.
+  SmallVector<MachineInstr *, 4> DbgDefUsers;
+  const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+  for (auto &MO : MI.operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+      continue;
+    for (auto &User : MRI.use_instructions(MO.getReg())) {
+      if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
+        continue;
+
+      // If is in same block, will either sink or be use-before-def.
+      if (User.getParent() == MI.getParent())
+        continue;
+
+      assert(User.getOperand(0).isReg() &&
+             "DBG_VALUE user of vreg, but non reg operand?");
+      DbgDefUsers.push_back(&User);
+    }
+  }
+
+  // Point the users of this copy that are no longer dominated, at the source
+  // of the copy.
+  for (auto *User : DbgDefUsers) {
+    User->getOperand(0).setReg(MI.getOperand(1).getReg());
+    User->getOperand(0).setSubReg(MI.getOperand(1).getSubReg());
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // This pass is not intended to be a replacement or a complete alternative
 // for the pre-ra machine sink pass. It is only designed to sink COPY
@@ -1051,10 +1215,14 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
     for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
       SuccBB->removeLiveIn(*S);
   for (auto U : UsedOpsInCopy) {
-    Register Reg = MI->getOperand(U).getReg();
-    if (!SuccBB->isLiveIn(Reg))
-      SuccBB->addLiveIn(Reg);
+    Register SrcReg = MI->getOperand(U).getReg();
+    LaneBitmask Mask;
+    for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
+      Mask |= (*S).second;
+    }
+    SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
   }
+  SuccBB->sortUniqueLiveIns();
 }
 
 static bool hasRegisterDependency(MachineInstr *MI,
@@ -1206,7 +1374,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
     // block.
     clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
     MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
-    performSink(*MI, *SuccBB, InsertPos, &DbgValsToSink);
+    performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
     updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
 
     Changed = true;
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
new file mode 100644
index 000000000000..aff67f9cfd55
--- /dev/null
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -0,0 +1,122 @@
+//===- MachineSizeOpts.cpp - code size optimization related code ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some shared machine IR code size optimization related
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> EnablePGSO;
+extern cl::opt<bool> PGSOLargeWorkingSetSizeOnly;
+extern cl::opt<bool> ForcePGSO;
+extern cl::opt<int> PgsoCutoffInstrProf;
+extern cl::opt<int> PgsoCutoffSampleProf;
+
+namespace machine_size_opts_detail {
+
+/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock.
+bool isColdBlock(const MachineBasicBlock *MBB,
+                 ProfileSummaryInfo *PSI,
+                 const MachineBlockFrequencyInfo *MBFI) {
+  auto Count = MBFI->getBlockProfileCount(MBB);
+  return Count && PSI->isColdCount(*Count);
+}
+
+/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
+static bool isHotBlockNthPercentile(int PercentileCutoff,
+                                    const MachineBasicBlock *MBB,
+                                    ProfileSummaryInfo *PSI,
+                                    const MachineBlockFrequencyInfo *MBFI) {
+  auto Count = MBFI->getBlockProfileCount(MBB);
+  return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
+/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
+/// MachineFunction.
+bool isFunctionColdInCallGraph(
+    const MachineFunction *MF,
+    ProfileSummaryInfo *PSI,
+    const MachineBlockFrequencyInfo &MBFI) {
+  if (auto FunctionCount = MF->getFunction().getEntryCount())
+    if (!PSI->isColdCount(FunctionCount.getCount()))
+      return false;
+  for (const auto &MBB : *MF)
+    if (!isColdBlock(&MBB, PSI, &MBFI))
+      return false;
+  return true;
+}
+
+/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for
+/// MachineFunction.
+bool isFunctionHotInCallGraphNthPercentile(
+    int PercentileCutoff,
+    const MachineFunction *MF,
+    ProfileSummaryInfo *PSI,
+    const MachineBlockFrequencyInfo &MBFI) {
+  if (auto FunctionCount = MF->getFunction().getEntryCount())
+    if (PSI->isHotCountNthPercentile(PercentileCutoff,
+                                     FunctionCount.getCount()))
+      return true;
+  for (const auto &MBB : *MF)
+    if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
+      return true;
+  return false;
+}
+} // namespace machine_size_opts_detail
+
+namespace {
+struct MachineBasicBlockBFIAdapter {
+  static bool isFunctionColdInCallGraph(const MachineFunction *MF,
+                                        ProfileSummaryInfo *PSI,
+                                        const MachineBlockFrequencyInfo &MBFI) {
+    return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI);
+  }
+  static bool isFunctionHotInCallGraphNthPercentile(
+      int CutOff,
+      const MachineFunction *MF,
+      ProfileSummaryInfo *PSI,
+      const MachineBlockFrequencyInfo &MBFI) {
+    return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile(
+        CutOff, MF, PSI, MBFI);
+  }
+  static bool isColdBlock(const MachineBasicBlock *MBB,
+                          ProfileSummaryInfo *PSI,
+                          const MachineBlockFrequencyInfo *MBFI) {
+    return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
+  }
+  static bool isHotBlockNthPercentile(int CutOff,
+                                      const MachineBasicBlock *MBB,
+                                      ProfileSummaryInfo *PSI,
+                                      const MachineBlockFrequencyInfo *MBFI) {
+    return machine_size_opts_detail::isHotBlockNthPercentile(
+        CutOff, MBB, PSI, MBFI);
+  }
+};
+} // end anonymous namespace
+
+bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
+                                 ProfileSummaryInfo *PSI,
+                                 const MachineBlockFrequencyInfo *MBFI,
+                                 PGSOQueryType QueryType) {
+  return shouldFuncOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+      MF, PSI, MBFI, QueryType);
+}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+                                 ProfileSummaryInfo *PSI,
+                                 const MachineBlockFrequencyInfo *MBFI,
+                                 PGSOQueryType QueryType) {
+  return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+      MBB, PSI, MBFI, QueryType);
+}
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 66a3bc2f8cc4..e6b51b7e1e56 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 969743edca52..6c0402df8489 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -59,6 +59,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrDesc.h"
@@ -123,8 +124,8 @@ namespace {
     void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
       RV.push_back(Reg);
       if (Register::isPhysicalRegister(Reg))
-        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
-          RV.push_back(*SubRegs);
+        for (const MCPhysReg &SubReg : TRI->subregs(Reg))
+          RV.push_back(SubReg);
     }
 
     struct BBInfo {
@@ -801,18 +802,16 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         report("MBB live-in list contains non-physical register", MBB);
         continue;
       }
-      for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
-           SubRegs.isValid(); ++SubRegs)
-        regsLive.insert(*SubRegs);
+      for (const MCPhysReg &SubReg : TRI->subregs_inclusive(LI.PhysReg))
+        regsLive.insert(SubReg);
     }
   }
 
   const MachineFrameInfo &MFI = MF->getFrameInfo();
   BitVector PR = MFI.getPristineRegs(*MF);
   for (unsigned I : PR.set_bits()) {
-    for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
-         SubRegs.isValid(); ++SubRegs)
-      regsLive.insert(*SubRegs);
+    for (const MCPhysReg &SubReg : TRI->subregs_inclusive(I))
+      regsLive.insert(SubReg);
   }
 
   regsKilled.clear();
@@ -1100,7 +1099,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
 
     break;
   }
-  case TargetOpcode::G_GEP: {
+  case TargetOpcode::G_PTR_ADD: {
     LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
     LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
     LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
@@ -1408,18 +1407,6 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
       break;
     }
 
-    const Constant *Mask = MaskOp.getShuffleMask();
-    auto *MaskVT = dyn_cast<VectorType>(Mask->getType());
-    if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) {
-      report("Invalid shufflemask constant type", MI);
-      break;
-    }
-
-    if (!Mask->getAggregateElement(0u)) {
-      report("Invalid shufflemask constant type", MI);
-      break;
-    }
-
     LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
     LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg());
     LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg());
@@ -1435,8 +1422,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
     int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
     int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
 
-    SmallVector<int, 32> MaskIdxes;
-    ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes);
+    ArrayRef<int> MaskIdxes = MaskOp.getShuffleMask();
 
     if (static_cast<int>(MaskIdxes.size()) != DstNumElts)
       report("Wrong result type for shufflemask", MI);
@@ -1609,13 +1595,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   } else if (MONum < MCID.getNumOperands()) {
     const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
     // Don't check if it's the last operand in a variadic instruction. See,
-    // e.g., LDM_RET in the arm back end.
-    if (MO->isReg() &&
-        !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
-      if (MO->isDef() && !MCOI.isOptionalDef())
-        report("Explicit operand marked as def", MO, MONum);
-      if (MO->isImplicit())
-        report("Explicit operand marked as implicit", MO, MONum);
+    // e.g., LDM_RET in the arm back end. Check non-variadic operands only.
+    bool IsOptional = MI->isVariadic() && MONum == MCID.getNumOperands() - 1;
+    if (!IsOptional) {
+      if (MO->isReg()) {
+        if (MO->isDef() && !MCOI.isOptionalDef())
+          report("Explicit operand marked as def", MO, MONum);
+        if (MO->isImplicit())
+          report("Explicit operand marked as implicit", MO, MONum);
+      }
+
+      // Check that an instruction has register operands only as expected.
+      if (MCOI.OperandType == MCOI::OPERAND_REGISTER &&
+          !MO->isReg() && !MO->isFI())
+        report("Expected a register operand.", MO, MONum);
+      if ((MCOI.OperandType == MCOI::OPERAND_IMMEDIATE ||
+           MCOI.OperandType == MCOI::OPERAND_PCREL) && MO->isReg())
+        report("Expected a non-register operand.", MO, MONum);
     }
 
     int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO);
@@ -2005,9 +2001,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
         bool Bad = !isReserved(Reg);
         // We are fine if just any subregister has a defined value.
         if (Bad) {
-          for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid();
-               ++SubRegs) {
-            if (regsLive.count(*SubRegs)) {
+
+          for (const MCPhysReg &SubReg : TRI->subregs(Reg)) {
+            if (regsLive.count(SubReg)) {
               Bad = false;
               break;
             }
@@ -2025,9 +2021,8 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             if (!Register::isPhysicalRegister(MOP.getReg()))
               continue;
 
-            for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
-                 ++SubRegs) {
-              if (*SubRegs == Reg) {
+            for (const MCPhysReg &SubReg : TRI->subregs(MOP.getReg())) {
+              if (SubReg == Reg) {
                 Bad = false;
                 break;
               }
@@ -2304,6 +2299,32 @@ void MachineVerifier::visitMachineFunctionAfter() {
   if (LiveInts)
     verifyLiveIntervals();
 
+  // Check live-in list of each MBB. If a register is live into MBB, check
+  // that the register is in regsLiveOut of each predecessor block. Since
+  // this must come from a definition in the predecesssor or its live-in
+  // list, this will catch a live-through case where the predecessor does not
+  // have the register in its live-in list.  This currently only checks
+  // registers that have no aliases, are not allocatable and are not
+  // reserved, which could mean a condition code register for instance.
+  if (MRI->tracksLiveness())
+    for (const auto &MBB : *MF)
+      for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) {
+        MCPhysReg LiveInReg = P.PhysReg;
+        bool hasAliases = MCRegAliasIterator(LiveInReg, TRI, false).isValid();
+        if (hasAliases || isAllocatable(LiveInReg) || isReserved(LiveInReg))
+          continue;
+        for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+          BBInfo &PInfo = MBBInfoMap[Pred];
+          if (!PInfo.regsLiveOut.count(LiveInReg)) {
+            report("Live in register not found to be live out from predecessor.",
+                   &MBB);
+            errs() << TRI->getName(LiveInReg)
+                   << " not found to be live out from "
+                   << printMBBReference(*Pred) << "\n";
+          }
+        }
+      }
+
   for (auto CSInfo : MF->getCallSitesInfo())
     if (!CSInfo.first->isCall())
       report("Call site info referencing instruction that is not call", MF);
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index d21eae222af0..d2ee21c8720f 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) {
   return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
 }
 
+static SUnit *getPredClusterSU(const SUnit &SU) {
+  for (const SDep &SI : SU.Preds)
+    if (SI.isCluster())
+      return SI.getSUnit();
+
+  return nullptr;
+}
+
+static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+  unsigned Num = 1;
+  const SUnit *CurrentSU = &SU;
+  while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
+  return Num < FuseLimit;
+}
+
 static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
                                 SUnit &SecondSU) {
   // Check that neither instr is already paired with another along the edge
@@ -56,6 +71,14 @@ static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
   if (!DAG.addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster)))
     return false;
 
+  // TODO - If we want to chain more than two instructions, we need to create
+  // artifical edges to make dependencies from the FirstSU also dependent
+  // on other chained instructions, and other chained instructions also
+  // dependent on the dependencies of the SecondSU, to prevent them from being
+  // scheduled into these chained instructions.
+  assert(hasLessThanNumFused(FirstSU, 2) &&
+         "Currently we only support chaining together two instructions");
+
   // Adjust the latency between both instrs.
   for (SDep &SI : FirstSU.Succs)
     if (SI.getSUnit() == &SecondSU)
@@ -161,8 +184,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
     if (DepSU.isBoundaryNode())
       continue;
 
+    // Only chain two instructions together at most.
     const MachineInstr *DepMI = DepSU.getInstr();
-    if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+    if (!hasLessThanNumFused(DepSU, 2) ||
+        !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
       continue;
 
     if (fuseInstructionPair(DAG, DepSU, AnchorSU))
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 7ce3c5861801..163e52d9199d 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/MachineLoopUtils.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1189,7 +1190,7 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
 bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
   if (!Phi.isPHI())
     return false;
-  unsigned DefCycle = Schedule.getCycle(&Phi);
+  int DefCycle = Schedule.getCycle(&Phi);
   int DefStage = Schedule.getStage(&Phi);
 
   unsigned InitVal = 0;
@@ -1198,7 +1199,7 @@ bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
   MachineInstr *Use = MRI.getVRegDef(LoopVal);
   if (!Use || Use->isPHI())
     return true;
-  unsigned LoopCycle = Schedule.getCycle(Use);
+  int LoopCycle = Schedule.getCycle(Use);
   int LoopStage = Schedule.getStage(Use);
   return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
 }
@@ -1214,7 +1215,7 @@ namespace {
 // Remove any dead phis in MBB. Dead phis either have only one block as input
 // (in which case they are the identity) or have no uses.
 void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
-                       LiveIntervals *LIS) {
+                       LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
   bool Changed = true;
   while (Changed) {
     Changed = false;
@@ -1226,7 +1227,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
           LIS->RemoveMachineInstrFromMaps(MI);
         MI.eraseFromParent();
         Changed = true;
-      } else if (MI.getNumExplicitOperands() == 3) {
+      } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
         MRI.constrainRegClass(MI.getOperand(1).getReg(),
                               MRI.getRegClass(MI.getOperand(0).getReg()));
         MRI.replaceRegWith(MI.getOperand(0).getReg(),
@@ -1582,6 +1583,133 @@ PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
   return NewBB;
 }
 
+void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
+                                                       int MinStage) {
+  for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
+       I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
+    MachineInstr *MI = &*I++;
+    int Stage = getStage(MI);
+    if (Stage == -1 || Stage >= MinStage)
+      continue;
+
+    for (MachineOperand &DefMO : MI->defs()) {
+      SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+      for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+        // Only PHIs can use values from this block by construction.
+        // Match with the equivalent PHI in B.
+        assert(UseMI.isPHI());
+        Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+                                               MI->getParent());
+        Subs.emplace_back(&UseMI, Reg);
+      }
+      for (auto &Sub : Subs)
+        Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+                                      *MRI.getTargetRegisterInfo());
+    }
+    if (LIS)
+      LIS->RemoveMachineInstrFromMaps(*MI);
+    MI->eraseFromParent();
+  }
+}
+
+void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
+    MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
+  auto InsertPt = DestBB->getFirstNonPHI();
+  DenseMap<Register, Register> Remaps;
+  for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
+    MachineInstr *MI = &*I++;
+    if (MI->isPHI()) {
+      // This is an illegal PHI. If we move any instructions using an illegal
+      // PHI, we need to create a legal Phi
+      Register PhiR = MI->getOperand(0).getReg();
+      auto RC = MRI.getRegClass(PhiR);
+      Register NR = MRI.createVirtualRegister(RC);
+      MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(), DebugLoc(),
+                                 TII->get(TargetOpcode::PHI), NR)
+                             .addReg(PhiR)
+                             .addMBB(SourceBB);
+      BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
+      CanonicalMIs[NI] = CanonicalMIs[MI];
+      Remaps[PhiR] = NR;
+      continue;
+    }
+    if (getStage(MI) != Stage)
+      continue;
+    MI->removeFromParent();
+    DestBB->insert(InsertPt, MI);
+    auto *KernelMI = CanonicalMIs[MI];
+    BlockMIs[{DestBB, KernelMI}] = MI;
+    BlockMIs.erase({SourceBB, KernelMI});
+  }
+  SmallVector<MachineInstr *, 4> PhiToDelete;
+  for (MachineInstr &MI : DestBB->phis()) {
+    assert(MI.getNumOperands() == 3);
+    MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
+    // If the instruction referenced by the phi is moved inside the block
+    // we don't need the phi anymore.
+    if (getStage(Def) == Stage) {
+      Register PhiReg = MI.getOperand(0).getReg();
+      MRI.replaceRegWith(MI.getOperand(0).getReg(),
+                         Def->getOperand(0).getReg());
+      MI.getOperand(0).setReg(PhiReg);
+      PhiToDelete.push_back(&MI);
+    }
+  }
+  for (auto *P : PhiToDelete)
+    P->eraseFromParent();
+  InsertPt = DestBB->getFirstNonPHI();
+  // Helper to clone Phi instructions into the destination block. We clone Phi
+  // greedily to avoid combinatorial explosion of Phi instructions.
+  auto clonePhi = [&](MachineInstr *Phi) {
+    MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
+    DestBB->insert(InsertPt, NewMI);
+    Register OrigR = Phi->getOperand(0).getReg();
+    Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+    NewMI->getOperand(0).setReg(R);
+    NewMI->getOperand(1).setReg(OrigR);
+    NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
+    Remaps[OrigR] = R;
+    CanonicalMIs[NewMI] = CanonicalMIs[Phi];
+    BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
+    PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
+    return R;
+  };
+  for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
+    for (MachineOperand &MO : I->uses()) {
+      if (!MO.isReg())
+        continue;
+      if (Remaps.count(MO.getReg()))
+        MO.setReg(Remaps[MO.getReg()]);
+      else {
+        // If we are using a phi from the source block we need to add a new phi
+        // pointing to the old one.
+        MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
+        if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
+          Register R = clonePhi(Use);
+          MO.setReg(R);
+        }
+      }
+    }
+  }
+}
+
+Register
+PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
+                                                  MachineInstr *Phi) {
+  unsigned distance = PhiNodeLoopIteration[Phi];
+  MachineInstr *CanonicalUse = CanonicalPhi;
+  for (unsigned I = 0; I < distance; ++I) {
+    assert(CanonicalUse->isPHI());
+    assert(CanonicalUse->getNumOperands() == 5);
+    unsigned LoopRegIdx = 3, InitRegIdx = 1;
+    if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
+      std::swap(LoopRegIdx, InitRegIdx);
+    CanonicalUse =
+        MRI.getVRegDef(CanonicalUse->getOperand(LoopRegIdx).getReg());
+  }
+  return CanonicalUse->getOperand(0).getReg();
+}
+
 void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
   BitVector LS(Schedule.getNumStages(), true);
   BitVector AS(Schedule.getNumStages(), true);
@@ -1604,26 +1732,45 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
   // property that any value deffed in BB but used outside of BB is used by a
   // PHI in the exiting block.
   MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
-
+  EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
   // Push out the epilogs, again in reverse order.
   // We can't assume anything about the minumum loop trip count at this point,
-  // so emit a fairly complex epilog:
-  //  K[0, 1, 2]     // Kernel runs stages 0, 1, 2
-  //  E0[2] <- P1    // Epilog runs stage 2 only, so the state after is [0].
-  //  E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
-  //
-  // This creates a single-successor single-predecessor sequence of blocks for
-  // each epilog, which are kept this way for simplicity at this stage and
-  // cleaned up by the optimizer later.
+  // so emit a fairly complex epilog.
+
+  // We first peel number of stages minus one epilogue. Then we remove dead
+  // stages and reorder instructions based on their stage. If we have 3 stages
+  // we generate first:
+  // E0[3, 2, 1]
+  // E1[3', 2']
+  // E2[3'']
+  // And then we move instructions based on their stages to have:
+  // E0[3]
+  // E1[2, 3']
+  // E2[1, 2', 3'']
+  // The transformation is legal because we only move instructions past
+  // instructions of a previous loop iteration.
   for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
-    Epilogs.push_back(nullptr);
-    for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
-      LS.reset();
-      LS[J] = 1;
-      Epilogs.back() = peelKernel(LPD_Back);
-      LiveStages[Epilogs.back()] = LS;
-      AvailableStages[Epilogs.back()] = AS;
+    Epilogs.push_back(peelKernel(LPD_Back));
+    MachineBasicBlock *B = Epilogs.back();
+    filterInstructions(B, Schedule.getNumStages() - I);
+    // Keep track at which iteration each phi belongs to. We need it to know
+    // what version of the variable to use during prologue/epilogue stitching.
+    EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
+    for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
+      PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I;
+  }
+  for (size_t I = 0; I < Epilogs.size(); I++) {
+    LS.reset();
+    for (size_t J = I; J < Epilogs.size(); J++) {
+      int Iteration = J;
+      unsigned Stage = Schedule.getNumStages() - 1 + I - J;
+      // Move stage one block at a time so that Phi nodes are updated correctly.
+      for (size_t K = Iteration; K > I; K--)
+        moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
+      LS[Stage] = 1;
     }
+    LiveStages[Epilogs[I]] = LS;
+    AvailableStages[Epilogs[I]] = AS;
   }
 
   // Now we've defined all the prolog and epilog blocks as a fallthrough
@@ -1638,8 +1785,16 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
     for (MachineInstr &MI : (*EI)->phis()) {
       Register Reg = MI.getOperand(1).getReg();
       MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
-      if (Use && Use->getParent() == Pred)
+      if (Use && Use->getParent() == Pred) {
+        MachineInstr *CanonicalUse = CanonicalMIs[Use];
+        if (CanonicalUse->isPHI()) {
+          // If the use comes from a phi we need to skip as many phi as the
+          // distance between the epilogue and the kernel. Trace through the phi
+          // chain to find the right value.
+          Reg = getPhiCanonicalReg(CanonicalUse, Use);
+        }
         Reg = getEquivalentRegisterIn(Reg, *PI);
+      }
       MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
       MI.addOperand(MachineOperand::CreateMBB(*PI));
     }
@@ -1659,6 +1814,13 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
       rewriteUsesOf(MI);
     }
   }
+  for (auto *MI : IllegalPhisToDelete) {
+    if (LIS)
+      LIS->RemoveMachineInstrFromMaps(*MI);
+    MI->eraseFromParent();
+  }
+  IllegalPhisToDelete.clear();
+
   // Now all remapping has been done, we're free to optimize the generated code.
   for (MachineBasicBlock *B : reverse(Blocks))
     EliminateDeadPhis(B, MRI, LIS);
@@ -1727,9 +1889,10 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
       R = MI->getOperand(1).getReg();
     MRI.setRegClass(R, MRI.getRegClass(PhiR));
     MRI.replaceRegWith(PhiR, R);
-    if (LIS)
-      LIS->RemoveMachineInstrFromMaps(*MI);
-    MI->eraseFromParent();
+    // Postpone deleting the Phi as it may be referenced by BlockMIs and used
+    // later to figure out how to remap registers.
+    MI->getOperand(0).setReg(PhiR);
+    IllegalPhisToDelete.push_back(MI);
     return;
   }
 
@@ -1759,10 +1922,6 @@ void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
 }
 
 void PeelingModuloScheduleExpander::fixupBranches() {
-  std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
-      TII->analyzeLoopForPipelining(BB);
-  assert(Info);
-
   // Work outwards from the kernel.
   bool KernelDisposed = false;
   int TC = Schedule.getNumStages() - 1;
@@ -1818,6 +1977,8 @@ void PeelingModuloScheduleExpander::expand() {
   BB = Schedule.getLoop()->getTopBlock();
   Preheader = Schedule.getLoop()->getLoopPreheader();
   LLVM_DEBUG(Schedule.dump());
+  Info = TII->analyzeLoopForPipelining(BB);
+  assert(Info);
 
   rewriteKernel();
   peelPrologAndEpilogs();
diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
new file mode 100644
index 000000000000..9ed3471c0fc9
--- /dev/null
+++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -0,0 +1,54 @@
+//===-- NonRelocatableStringpool.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/NonRelocatableStringpool.h"
+
+namespace llvm {
+
+DwarfStringPoolEntryRef NonRelocatableStringpool::getEntry(StringRef S) {
+  if (S.empty() && !Strings.empty())
+    return EmptyString;
+
+  if (Translator)
+    S = Translator(S);
+  auto I = Strings.insert({S, DwarfStringPoolEntry()});
+  auto &Entry = I.first->second;
+  if (I.second || !Entry.isIndexed()) {
+    Entry.Index = NumEntries++;
+    Entry.Offset = CurrentEndOffset;
+    Entry.Symbol = nullptr;
+    CurrentEndOffset += S.size() + 1;
+  }
+  return DwarfStringPoolEntryRef(*I.first, true);
+}
+
+StringRef NonRelocatableStringpool::internString(StringRef S) {
+  DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed};
+
+  if (Translator)
+    S = Translator(S);
+
+  auto InsertResult = Strings.insert({S, Entry});
+  return InsertResult.first->getKey();
+}
+
+std::vector<DwarfStringPoolEntryRef>
+NonRelocatableStringpool::getEntriesForEmission() const {
+  std::vector<DwarfStringPoolEntryRef> Result;
+  Result.reserve(Strings.size());
+  for (const auto &E : Strings)
+    if (E.getValue().isIndexed())
+      Result.emplace_back(E, true);
+  llvm::sort(Result, [](const DwarfStringPoolEntryRef A,
+                        const DwarfStringPoolEntryRef B) {
+    return A.getIndex() < B.getIndex();
+  });
+  return Result;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp
index 1a493964e678..02a70ab801e9 100644
--- a/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include <cassert>
 
diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp
index e4c73658cb4f..7dbd830666fb 100644
--- a/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/llvm/lib/CodeGen/ParallelCG.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 
 static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
                     function_ref<std::unique_ptr<TargetMachine>()> TMFactory,
-                    TargetMachine::CodeGenFileType FileType) {
+                    CodeGenFileType FileType) {
   std::unique_ptr<TargetMachine> TM = TMFactory();
   legacy::PassManager CodeGenPasses;
   if (TM->addPassesToEmitFile(CodeGenPasses, OS, nullptr, FileType))
@@ -38,7 +38,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
     std::unique_ptr<Module> M, ArrayRef<llvm::raw_pwrite_stream *> OSs,
     ArrayRef<llvm::raw_pwrite_stream *> BCOSs,
     const std::function<std::unique_ptr<TargetMachine>()> &TMFactory,
-    TargetMachine::CodeGenFileType FileType, bool PreserveLocals) {
+    CodeGenFileType FileType, bool PreserveLocals) {
   assert(BCOSs.empty() || BCOSs.size() == OSs.size());
 
   if (OSs.size() == 1) {
diff --git a/llvm/lib/CodeGen/PatchableFunction.cpp b/llvm/lib/CodeGen/PatchableFunction.cpp
index 529fde84e39a..1d6069c50554 100644
--- a/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/TargetFrameLowering.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
@@ -54,6 +55,15 @@ static bool doesNotGeneratecode(const MachineInstr &MI) {
 }
 
 bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
+  if (MF.getFunction().hasFnAttribute("patchable-function-entry")) {
+    MachineBasicBlock &FirstMBB = *MF.begin();
+    MachineInstr &FirstMI = *FirstMBB.begin();
+    const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+    BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
+            TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
+    return true;
+  }
+
   if (!MF.getFunction().hasFnAttribute("patchable-function"))
     return false;
 
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 54f1d38ed106..c9c279cf0ddf 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -84,6 +84,7 @@
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Pass.h"
diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 0a3838617bc5..4f88f4d3dd6a 100644
--- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -32,6 +32,7 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 5bea9f2893c9..d68959935cec 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -38,6 +38,7 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -77,7 +78,7 @@ AntiDepBreaker::~AntiDepBreaker() { }
 
 namespace {
   class PostRAScheduler : public MachineFunctionPass {
-    const TargetInstrInfo *TII;
+    const TargetInstrInfo *TII = nullptr;
     RegisterClassInfo RegClassInfo;
 
   public:
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 0d2f6f99ca96..1ff4e7cbd8fb 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -12,14 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 
@@ -56,6 +58,17 @@ static bool lowerLoadRelative(Function &F) {
   return Changed;
 }
 
+// ObjCARC has knowledge about whether an obj-c runtime function needs to be
+// always tail-called or never tail-called.
+static CallInst::TailCallKind getOverridingTailCallKind(const Function &F) {
+  objcarc::ARCInstKind Kind = objcarc::GetFunctionClass(&F);
+  if (objcarc::IsAlwaysTail(Kind))
+    return CallInst::TCK_Tail;
+  else if (objcarc::IsNeverTail(Kind))
+    return CallInst::TCK_NoTail;
+  return CallInst::TCK_None;
+}
+
 static bool lowerObjCCall(Function &F, const char *NewFn,
                           bool setNonLazyBind = false) {
   if (F.use_empty())
@@ -75,6 +88,8 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
     }
   }
 
+  CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
+
   for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
     auto *CI = cast<CallInst>(I->getUser());
     assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
@@ -84,7 +99,17 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
     SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
     CallInst *NewCI = Builder.CreateCall(FCache, Args);
     NewCI->setName(CI->getName());
-    NewCI->setTailCallKind(CI->getTailCallKind());
+
+    // Try to set the most appropriate TailCallKind based on both the current
+    // attributes and the ones that we could get from ObjCARC's special
+    // knowledge of the runtime functions.
+    //
+    // std::max respects both requirements of notail and tail here:
+    // * notail on either the call or from ObjCARC becomes notail
+    // * tail on either side is stronger than none, but not notail
+    CallInst::TailCallKind TCK = CI->getTailCallKind();
+    NewCI->setTailCallKind(std::max(TCK, OverridingTCK));
+
     if (!CI->use_empty())
       CI->replaceAllUsesWith(NewCI);
     CI->eraseFromParent();
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 11bff45f9ad5..ed19f7448151 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 729f06dda62b..3909b5717281 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -51,6 +51,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index 2850033e6419..3c1f9905afd0 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/ReachingDefAnalysis.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -132,8 +133,6 @@ void ReachingDefAnalysis::processBasicBlock(
 }
 
 bool ReachingDefAnalysis::runOnMachineFunction(MachineFunction &mf) {
-  if (skipFunction(mf.getFunction()))
-    return false;
   MF = &mf;
   TRI = MF->getSubtarget().getRegisterInfo();
 
@@ -189,7 +188,145 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) {
   return LatestDef;
 }
 
+MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) {
+  return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg));
+}
+
+bool ReachingDefAnalysis::hasSameReachingDef(MachineInstr *A, MachineInstr *B,
+                                             int PhysReg) {
+  MachineBasicBlock *ParentA = A->getParent();
+  MachineBasicBlock *ParentB = B->getParent();
+  if (ParentA != ParentB)
+    return false;
+
+  return getReachingDef(A, PhysReg) == getReachingDef(B, PhysReg);
+}
+
+MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB,
+                                                 int InstId) {
+  assert(static_cast<size_t>(MBB->getNumber()) < MBBReachingDefs.size() &&
+         "Unexpected basic block number.");
+  assert(InstId < static_cast<int>(MBB->size()) &&
+         "Unexpected instruction id.");
+
+  if (InstId < 0)
+    return nullptr;
+
+  for (auto &MI : *MBB) {
+    if (InstIds.count(&MI) && InstIds[&MI] == InstId)
+      return &MI;
+  }
+  return nullptr;
+}
+
 int ReachingDefAnalysis::getClearance(MachineInstr *MI, MCPhysReg PhysReg) {
   assert(InstIds.count(MI) && "Unexpected machine instuction.");
   return InstIds[MI] - getReachingDef(MI, PhysReg);
 }
+
+void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg,
+    SmallVectorImpl<MachineInstr*> &Uses) {
+  MachineBasicBlock *MBB = Def->getParent();
+  MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def);
+  while (++MI != MBB->end()) {
+    // If/when we find a new reaching def, we know that there's no more uses
+    // of 'Def'.
+    if (getReachingMIDef(&*MI, PhysReg) != Def)
+      return;
+
+    for (auto &MO : MI->operands()) {
+      if (!MO.isReg() || !MO.isUse() || MO.getReg() != PhysReg)
+        continue;
+
+      Uses.push_back(&*MI);
+      if (MO.isKill())
+        return;
+    }
+  }
+}
+
+unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) {
+  SmallVector<MachineInstr*, 4> Uses;
+  getReachingLocalUses(Def, PhysReg, Uses);
+  return Uses.size();
+}
+
+bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) {
+  MachineBasicBlock *MBB = MI->getParent();
+  LivePhysRegs LiveRegs(*TRI);
+  LiveRegs.addLiveOuts(*MBB);
+
+  // Yes if the register is live out of the basic block.
+  if (LiveRegs.contains(PhysReg))
+    return true;
+
+  // Walk backwards through the block to see if the register is live at some
+  // point.
+  for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) {
+    LiveRegs.stepBackward(*Last);
+    if (LiveRegs.contains(PhysReg))
+      return InstIds[&*Last] > InstIds[MI];
+  }
+  return false;
+}
+
+bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, int PhysReg) {
+  MachineBasicBlock *MBB = MI->getParent();
+  LivePhysRegs LiveRegs(*TRI);
+  LiveRegs.addLiveOuts(*MBB);
+  if (!LiveRegs.contains(PhysReg))
+    return false;
+
+  MachineInstr *Last = &MBB->back();
+  int Def = getReachingDef(MI, PhysReg);
+  if (getReachingDef(Last, PhysReg) != Def)
+    return false;
+
+  // Finally check that the last instruction doesn't redefine the register.
+  for (auto &MO : Last->operands())
+    if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+      return false;
+
+  return true;
+}
+
+MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
+                                                        int PhysReg) {
+  LivePhysRegs LiveRegs(*TRI);
+  LiveRegs.addLiveOuts(*MBB);
+  if (!LiveRegs.contains(PhysReg))
+    return nullptr;
+
+  MachineInstr *Last = &MBB->back();
+  int Def = getReachingDef(Last, PhysReg);
+  for (auto &MO : Last->operands())
+    if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg)
+      return Last;
+
+  return Def < 0 ? nullptr : getInstFromId(MBB, Def);
+}
+
+MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI,
+    int PhysReg) {
+  auto I = MachineBasicBlock::reverse_iterator(MI);
+  auto E = MI->getParent()->rend();
+  I++;
+
+  for ( ; I != E; I++)
+    for (auto &MO : I->operands())
+      if (MO.isReg() && MO.isUse() && MO.getReg() == PhysReg)
+        return &*I;
+
+  return nullptr;
+}
+
+void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI,
+    int PhysReg, SmallVectorImpl<MachineInstr*> &Uses) {
+  MachineInstr *Use = nullptr;
+  MachineInstr *Pos = MI;
+
+  while ((Use = getInstWithUseBefore(Pos, PhysReg))) {
+    Uses.push_back(Use);
+    Pos = Use;
+  }
+}
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 44d0233604e7..89b5bcebd61c 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -35,6 +35,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
@@ -82,12 +83,12 @@ namespace {
     /// Everything we know about a live virtual register.
     struct LiveReg {
       MachineInstr *LastUse = nullptr; ///< Last instr to use reg.
-      unsigned VirtReg;                ///< Virtual register number.
+      Register VirtReg;                ///< Virtual register number.
       MCPhysReg PhysReg = 0;           ///< Currently held here.
       unsigned short LastOpNum = 0;    ///< OpNum on LastUse.
       bool Dirty = false;              ///< Register needs spill.
 
-      explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
+      explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {}
 
       unsigned getSparseSetIndex() const {
         return Register::virtReg2Index(VirtReg);
@@ -128,7 +129,7 @@ namespace {
     /// Maps each physical register to a RegState enum or a virtual register.
     std::vector<unsigned> PhysRegState;
 
-    SmallVector<unsigned, 16> VirtDead;
+    SmallVector<Register, 16> VirtDead;
     SmallVector<MachineInstr *, 32> Coalesced;
 
     using RegUnitSet = SparseSet<uint16_t, identity<uint16_t>>;
@@ -184,14 +185,14 @@ namespace {
     void allocateInstruction(MachineInstr &MI);
     void handleDebugValue(MachineInstr &MI);
     void handleThroughOperands(MachineInstr &MI,
-                               SmallVectorImpl<unsigned> &VirtDead);
+                               SmallVectorImpl<Register> &VirtDead);
     bool isLastUseOfLocalReg(const MachineOperand &MO) const;
 
     void addKillFlag(const LiveReg &LRI);
     void killVirtReg(LiveReg &LR);
-    void killVirtReg(unsigned VirtReg);
+    void killVirtReg(Register VirtReg);
     void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR);
-    void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+    void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg);
 
     void usePhysReg(MachineOperand &MO);
     void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg,
@@ -199,34 +200,34 @@ namespace {
     unsigned calcSpillCost(MCPhysReg PhysReg) const;
     void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
 
-    LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+    LiveRegMap::iterator findLiveVirtReg(Register VirtReg) {
       return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
     }
 
-    LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+    LiveRegMap::const_iterator findLiveVirtReg(Register VirtReg) const {
       return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
     }
 
-    void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+    void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint);
     void allocVirtRegUndef(MachineOperand &MO);
-    MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
-                            unsigned Hint);
-    LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
-                           unsigned Hint);
+    MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+                            Register Hint);
+    LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+                           Register Hint);
     void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
     bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
 
-    unsigned traceCopies(unsigned VirtReg) const;
-    unsigned traceCopyChain(unsigned Reg) const;
+    Register traceCopies(Register VirtReg) const;
+    Register traceCopyChain(Register Reg) const;
 
-    int getStackSpaceFor(unsigned VirtReg);
-    void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+    int getStackSpaceFor(Register VirtReg);
+    void spill(MachineBasicBlock::iterator Before, Register VirtReg,
                MCPhysReg AssignedReg, bool Kill);
-    void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+    void reload(MachineBasicBlock::iterator Before, Register VirtReg,
                 MCPhysReg PhysReg);
 
-    bool mayLiveOut(unsigned VirtReg);
-    bool mayLiveIn(unsigned VirtReg);
+    bool mayLiveOut(Register VirtReg);
+    bool mayLiveIn(Register VirtReg);
 
     void dumpState();
   };
@@ -244,7 +245,7 @@ void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
 
 /// This allocates space for the specified virtual register to be held on the
 /// stack.
-int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
+int RegAllocFast::getStackSpaceFor(Register VirtReg) {
   // Find the location Reg would belong...
   int SS = StackSlotForVirtReg[VirtReg];
   // Already has space allocated?
@@ -263,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
 }
 
 /// Returns false if \p VirtReg is known to not live out of the current block.
-bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
+bool RegAllocFast::mayLiveOut(Register VirtReg) {
   if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
     // Cannot be live-out if there are no successors.
     return !MBB->succ_empty();
@@ -292,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
 }
 
 /// Returns false if \p VirtReg is known to not be live into the current block.
-bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
+bool RegAllocFast::mayLiveIn(Register VirtReg) {
   if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
     return !MBB->pred_empty();
 
@@ -311,7 +312,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
 
 /// Insert spill instruction for \p AssignedReg before \p Before. Update
 /// DBG_VALUEs with \p VirtReg operands with the stack slot.
-void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
+void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
                          MCPhysReg AssignedReg, bool Kill) {
   LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI)
                     << " in " << printReg(AssignedReg, TRI));
@@ -339,7 +340,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
 }
 
 /// Insert reload instruction for \p PhysReg before \p Before.
-void RegAllocFast::reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
+void RegAllocFast::reload(MachineBasicBlock::iterator Before, Register VirtReg,
                           MCPhysReg PhysReg) {
   LLVM_DEBUG(dbgs() << "Reloading " << printReg(VirtReg, TRI) << " into "
                     << printReg(PhysReg, TRI) << '\n');
@@ -393,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) {
 }
 
 /// Mark virtreg as no longer available.
-void RegAllocFast::killVirtReg(unsigned VirtReg) {
+void RegAllocFast::killVirtReg(Register VirtReg) {
   assert(Register::isVirtualRegister(VirtReg) &&
          "killVirtReg needs a virtual register");
   LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -404,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
 /// This method spills the value specified by VirtReg into the corresponding
 /// stack slot if needed.
 void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
-                                unsigned VirtReg) {
+                                Register VirtReg) {
   assert(Register::isVirtualRegister(VirtReg) &&
          "Spilling a physical register is illegal!");
   LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -456,7 +457,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
     return;
 
   Register PhysReg = MO.getReg();
-  assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand");
+  assert(PhysReg.isPhysical() && "Bad usePhysReg operand");
 
   markRegUsedInInstr(PhysReg);
   switch (PhysRegState[PhysReg]) {
@@ -520,7 +521,7 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
 void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
                                  MCPhysReg PhysReg, RegState NewState) {
   markRegUsedInInstr(PhysReg);
-  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  switch (Register VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
     break;
   default:
@@ -536,7 +537,7 @@ void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI,
   setPhysRegState(PhysReg, NewState);
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
     MCPhysReg Alias = *AI;
-    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    switch (Register VirtReg = PhysRegState[Alias]) {
     case regDisabled:
       break;
     default:
@@ -562,7 +563,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
                       << " is already used in instr.\n");
     return spillImpossible;
   }
-  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  switch (Register VirtReg = PhysRegState[PhysReg]) {
   case regDisabled:
     break;
   case regFree:
@@ -584,7 +585,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
   unsigned Cost = 0;
   for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
     MCPhysReg Alias = *AI;
-    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    switch (Register VirtReg = PhysRegState[Alias]) {
     case regDisabled:
       break;
     case regFree:
@@ -608,7 +609,7 @@ unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
 /// proper container for VirtReg now.  The physical register must not be used
 /// for anything else when this is called.
 void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
-  unsigned VirtReg = LR.VirtReg;
+  Register VirtReg = LR.VirtReg;
   LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to "
                     << printReg(PhysReg, TRI) << '\n');
   assert(LR.PhysReg == 0 && "Already assigned a physreg");
@@ -621,13 +622,13 @@ static bool isCoalescable(const MachineInstr &MI) {
   return MI.isFullCopy();
 }
 
-unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
+Register RegAllocFast::traceCopyChain(Register Reg) const {
   static const unsigned ChainLengthLimit = 3;
   unsigned C = 0;
   do {
-    if (Register::isPhysicalRegister(Reg))
+    if (Reg.isPhysical())
       return Reg;
-    assert(Register::isVirtualRegister(Reg));
+    assert(Reg.isVirtual());
 
     MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
     if (!VRegDef || !isCoalescable(*VRegDef))
@@ -640,26 +641,26 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
 /// Check if any of \p VirtReg's definitions is a copy. If it is follow the
 /// chain of copies to check whether we reach a physical register we can
 /// coalesce with.
-unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
+Register RegAllocFast::traceCopies(Register VirtReg) const {
   static const unsigned DefLimit = 3;
   unsigned C = 0;
   for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
     if (isCoalescable(MI)) {
       Register Reg = MI.getOperand(1).getReg();
       Reg = traceCopyChain(Reg);
-      if (Reg != 0)
+      if (Reg.isValid())
         return Reg;
     }
 
     if (++C >= DefLimit)
       break;
   }
-  return 0;
+  return Register();
 }
 
 /// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
-  const unsigned VirtReg = LR.VirtReg;
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) {
+  const Register VirtReg = LR.VirtReg;
 
   assert(Register::isVirtualRegister(VirtReg) &&
          "Can only allocate virtual registers");
@@ -670,7 +671,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
                     << " with hint " << printReg(Hint0, TRI) << '\n');
 
   // Take hint when possible.
-  if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) &&
+  if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) &&
       RC.contains(Hint0)) {
     // Ignore the hint if we would have to spill a dirty register.
     unsigned Cost = calcSpillCost(Hint0);
@@ -686,12 +687,12 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
                         << "occupied\n");
     }
   } else {
-    Hint0 = 0;
+    Hint0 = Register();
   }
 
   // Try other hint.
-  unsigned Hint1 = traceCopies(VirtReg);
-  if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) &&
+  Register Hint1 = traceCopies(VirtReg);
+  if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) &&
       RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
     // Ignore the hint if we would have to spill a dirty register.
     unsigned Cost = calcSpillCost(Hint1);
@@ -707,7 +708,7 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
                         << "occupied\n");
     }
   } else {
-    Hint1 = 0;
+    Hint1 = Register();
   }
 
   MCPhysReg BestReg = 0;
@@ -775,14 +776,14 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
 
 /// Allocates a register for VirtReg and mark it as dirty.
 MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
-                                      unsigned VirtReg, unsigned Hint) {
+                                      Register VirtReg, Register Hint) {
   assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
   std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   if (!LRI->PhysReg) {
     // If there is no hint, peek at the only use of this register.
-    if ((!Hint || !Register::isPhysicalRegister(Hint)) &&
+    if ((!Hint || !Hint.isPhysical()) &&
         MRI->hasOneNonDBGUse(VirtReg)) {
       const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
       // It's a copy, use the destination register as a hint.
@@ -807,8 +808,8 @@ MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
 /// Make sure VirtReg is available in a physreg and return it.
 RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
                                                    unsigned OpNum,
-                                                   unsigned VirtReg,
-                                                   unsigned Hint) {
+                                                   Register VirtReg,
+                                                   Register Hint) {
   assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
@@ -884,13 +885,13 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
 // Handles special instruction operand like early clobbers and tied ops when
 // there are additional physreg defines.
 void RegAllocFast::handleThroughOperands(MachineInstr &MI,
-                                         SmallVectorImpl<unsigned> &VirtDead) {
+                                         SmallVectorImpl<Register> &VirtDead) {
   LLVM_DEBUG(dbgs() << "Scanning for through registers:");
-  SmallSet<unsigned, 8> ThroughRegs;
+  SmallSet<Register, 8> ThroughRegs;
   for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg()) continue;
     Register Reg = MO.getReg();
-    if (!Register::isVirtualRegister(Reg))
+    if (!Reg.isVirtual())
       continue;
     if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
         (MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
@@ -905,7 +906,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
   for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg() || !MO.isDef()) continue;
     Register Reg = MO.getReg();
-    if (!Reg || !Register::isPhysicalRegister(Reg))
+    if (!Reg || !Reg.isPhysical())
       continue;
     markRegUsedInInstr(Reg);
     for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
@@ -914,7 +915,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
     }
   }
 
-  SmallVector<unsigned, 8> PartialDefs;
+  SmallVector<Register, 8> PartialDefs;
   LLVM_DEBUG(dbgs() << "Allocating tied uses.\n");
   for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
     MachineOperand &MO = MI.getOperand(I);
@@ -961,7 +962,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
   for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
     Register Reg = MO.getReg();
-    if (!Reg || !Register::isPhysicalRegister(Reg))
+    if (!Reg || !Reg.isPhysical())
       continue;
     LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
                       << " as used in instr\n");
@@ -969,7 +970,7 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
   }
 
   // Also mark PartialDefs as used to avoid reallocation.
-  for (unsigned PartialDef : PartialDefs)
+  for (Register PartialDef : PartialDefs)
     markRegUsedInInstr(PartialDef);
 }
 
@@ -1002,7 +1003,7 @@ void RegAllocFast::dumpState() {
        e = LiveVirtRegs.end(); i != e; ++i) {
     if (!i->PhysReg)
       continue;
-    assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key");
+    assert(i->VirtReg.isVirtual() && "Bad map key");
     assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
     assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
   }
@@ -1013,8 +1014,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
   const MCInstrDesc &MCID = MI.getDesc();
 
   // If this is a copy, we may be able to coalesce.
-  unsigned CopySrcReg = 0;
-  unsigned CopyDstReg = 0;
+  Register CopySrcReg;
+  Register CopyDstReg;
   unsigned CopySrcSub = 0;
   unsigned CopyDstSub = 0;
   if (MI.isCopy()) {
@@ -1082,7 +1083,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
       (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
     handleThroughOperands(MI, VirtDead);
     // Don't attempt coalescing when we have funny stuff going on.
-    CopyDstReg = 0;
+    CopyDstReg = Register();
     // Pretend we have early clobbers so the use operands get marked below.
     // This is not necessary for the common case of a single tied use.
     hasEarlyClobbers = true;
@@ -1095,7 +1096,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
     MachineOperand &MO = MI.getOperand(I);
     if (!MO.isReg()) continue;
     Register Reg = MO.getReg();
-    if (!Register::isVirtualRegister(Reg))
+    if (!Reg.isVirtual())
       continue;
     if (MO.isUse()) {
       if (MO.isUndef()) {
@@ -1124,7 +1125,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
       if (!MO.isReg() || !MO.isUse())
         continue;
       Register Reg = MO.getReg();
-      if (!Register::isVirtualRegister(Reg))
+      if (!Reg.isVirtual())
         continue;
 
       assert(MO.isUndef() && "Should only have undef virtreg uses left");
@@ -1139,7 +1140,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
     for (const MachineOperand &MO : MI.operands()) {
       if (!MO.isReg()) continue;
       Register Reg = MO.getReg();
-      if (!Reg || !Register::isPhysicalRegister(Reg))
+      if (!Reg || !Reg.isPhysical())
         continue;
       // Look for physreg defs and tied uses.
       if (!MO.isDef() && !MO.isTied()) continue;
@@ -1168,7 +1169,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
       continue;
     Register Reg = MO.getReg();
 
-    if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg))
+    if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg))
       continue;
     definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
   }
@@ -1182,12 +1183,12 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
     Register Reg = MO.getReg();
 
     // We have already dealt with phys regs in the previous scan.
-    if (Register::isPhysicalRegister(Reg))
+    if (Reg.isPhysical())
       continue;
     MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
     if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
       VirtDead.push_back(Reg);
-      CopyDstReg = 0; // cancel coalescing;
+      CopyDstReg = Register(); // cancel coalescing;
     } else
       CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0;
   }
@@ -1196,7 +1197,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
   // register are allocated identically. We didn't need to do this for uses
   // because we are crerating our own kill flags, and they are always at the
   // last use.
-  for (unsigned VirtReg : VirtDead)
+  for (Register VirtReg : VirtDead)
     killVirtReg(VirtReg);
   VirtDead.clear();
 
@@ -1234,7 +1235,7 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
 
     // We can't allocate a physreg for a DebugValue, sorry!
     LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
-    MO.setReg(0);
+    MO.setReg(Register());
   }
 
   // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so
@@ -1252,7 +1253,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
   MachineBasicBlock::iterator MII = MBB.begin();
 
   // Add live-in registers as live.
-  for (const MachineBasicBlock::RegisterMaskPair LI : MBB.liveins())
+  for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins())
     if (MRI->isAllocatable(LI.PhysReg))
       definePhysReg(MII, LI.PhysReg, regReserved);
 
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index d27db678f02a..27de7fe45887 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -3126,6 +3126,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     spiller().spill(LRE);
     setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
 
+    // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
+    // the new regs are kept in LDV (still mapping to the old register), until
+    // we rewrite spilled locations in LDV at a later stage.
+    DebugVars->splitRegister(VirtReg.reg, LRE.regs(), *LIS);
+
     if (VerifyEnabled)
       MF->verify(this, "After spilling");
   }
@@ -3220,8 +3225,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
                         MF->getSubtarget().enableRALocalReassignment(
                             MF->getTarget().getOptLevel());
 
-  EnableAdvancedRASplitCost = ConsiderLocalIntervalCost ||
-                              MF->getSubtarget().enableAdvancedRASplitCost();
+  EnableAdvancedRASplitCost =
+      ConsiderLocalIntervalCost.getNumOccurrences()
+          ? ConsiderLocalIntervalCost
+          : MF->getSubtarget().enableAdvancedRASplitCost();
 
   if (VerifyEnabled)
     MF->verify(this, "Before greedy register allocator");
diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 757ff0e44953..5a79ac44dcf4 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -56,7 +56,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  // Call determineCalleeSaves and then also set the bits for subregs and
+  // Call getCalleeSaves and then also set the bits for subregs and
   // fully saved superregs.
   static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
 
@@ -199,7 +199,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
 
   // Target will return the set of registers that it saves/restores as needed.
   SavedRegs.clear();
-  TFI.determineCalleeSaves(MF, SavedRegs);
+  TFI.getCalleeSaves(MF, SavedRegs);
   if (SavedRegs.none())
     return;
 
diff --git a/llvm/lib/CodeGen/RegisterClassInfo.cpp b/llvm/lib/CodeGen/RegisterClassInfo.cpp
index 530e0cccf1d4..1523bd4d1649 100644
--- a/llvm/lib/CodeGen/RegisterClassInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterClassInfo.cpp
@@ -59,7 +59,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   if (Update || CSR != CalleeSavedRegs) {
     // Build a CSRAlias map. Every CSR alias saves the last
     // overlapping CSR.
-    CalleeSavedAliases.resize(TRI->getNumRegs(), 0);
+    CalleeSavedAliases.assign(TRI->getNumRegs(), 0);
     for (const MCPhysReg *I = CSR; *I; ++I)
       for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI)
         CalleeSavedAliases[*AI] = *I;
@@ -186,6 +186,7 @@ unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
       NumRCUnits = NUnits;
     }
   }
+  assert(RC && "Failed to find register class");
   compute(RC);
   unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC);
   return TRI->getRegPressureSetLimit(*MF, Idx) -
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 6ff5ddbc023d..a3f75d82d0ec 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -40,6 +40,7 @@
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -119,32 +120,46 @@ static cl::opt<unsigned> LargeIntervalFreqThreshold(
 
 namespace {
 
+  class JoinVals;
+
   class RegisterCoalescer : public MachineFunctionPass,
                             private LiveRangeEdit::Delegate {
-    MachineFunction* MF;
-    MachineRegisterInfo* MRI;
-    const TargetRegisterInfo* TRI;
-    const TargetInstrInfo* TII;
-    LiveIntervals *LIS;
-    const MachineLoopInfo* Loops;
-    AliasAnalysis *AA;
+    MachineFunction* MF = nullptr;
+    MachineRegisterInfo* MRI = nullptr;
+    const TargetRegisterInfo* TRI = nullptr;
+    const TargetInstrInfo* TII = nullptr;
+    LiveIntervals *LIS = nullptr;
+    const MachineLoopInfo* Loops = nullptr;
+    AliasAnalysis *AA = nullptr;
     RegisterClassInfo RegClassInfo;
 
+    /// Debug variable location tracking -- for each VReg, maintain an
+    /// ordered-by-slot-index set of DBG_VALUEs, to help quick
+    /// identification of whether coalescing may change location validity.
+    using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
+    DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues;
+
+    /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
+    /// To avoid repeatedly merging sets of DbgValueLocs, instead record
+    /// which vregs have been coalesced, and where to. This map is from
+    /// vreg => {set of vregs merged in}.
+    DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums;
+
     /// A LaneMask to remember on which subregister live ranges we need to call
     /// shrinkToUses() later.
     LaneBitmask ShrinkMask;
 
     /// True if the main range of the currently coalesced intervals should be
     /// checked for smaller live intervals.
-    bool ShrinkMainRange;
+    bool ShrinkMainRange = false;
 
     /// True if the coalescer should aggressively coalesce global copies
     /// in favor of keeping local copies.
-    bool JoinGlobalCopies;
+    bool JoinGlobalCopies = false;
 
     /// True if the coalescer should aggressively coalesce fall-thru
     /// blocks exclusively containing copies.
-    bool JoinSplitEdges;
+    bool JoinSplitEdges = false;
 
     /// Copy instructions yet to be coalesced.
     SmallVector<MachineInstr*, 8> WorkList;
@@ -225,7 +240,8 @@ namespace {
     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
     /// lanemasks already adjusted to the coalesced register.
     void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
-                           LaneBitmask LaneMask, CoalescerPair &CP);
+                           LaneBitmask LaneMask, CoalescerPair &CP,
+                           unsigned DstIdx);
 
     /// Join the liveranges of two subregisters. Joins @p RRange into
     /// @p LRange, @p RRange may be invalid afterwards.
@@ -325,6 +341,19 @@ namespace {
       MI->eraseFromParent();
     }
 
+    /// Walk over function and initialize the DbgVRegToValues map.
+    void buildVRegToDbgValueMap(MachineFunction &MF);
+
+    /// Test whether, after merging, any DBG_VALUEs would refer to a
+    /// different value number than before merging, and whether this can
+    /// be resolved. If not, mark the DBG_VALUE as being undef.
+    void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
+                                      JoinVals &LHSVals, LiveRange &RHS,
+                                      JoinVals &RHSVals);
+
+    void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange,
+                                          LiveRange &RegRange, JoinVals &Vals2);
+
   public:
     static char ID; ///< Class identification, replacement for typeinfo
 
@@ -1648,8 +1677,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
   }
 }
 
-void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
-                                          unsigned DstReg,
+void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg,
                                           unsigned SubIdx) {
   bool DstIsPhys = Register::isPhysicalRegister(DstReg);
   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1705,8 +1733,15 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
       if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
         if (!DstInt->hasSubRanges()) {
           BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
-          LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
-          DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
+          LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+          LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+          LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
+          DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
+          // The unused lanes are just empty live-ranges at this point.
+          // It is the caller responsibility to set the proper
+          // dead segments if there is an actual dead def of the
+          // unused lanes. This may happen with rematerialization.
+          DstInt->createSubRange(Allocator, UnusedLanes);
         }
         SlotIndex MIIdx = UseMI->isDebugValue()
                               ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
@@ -2195,6 +2230,7 @@ class JoinVals {
   /// NewVNInfo. This is suitable for passing to LiveInterval::join().
   SmallVector<int, 8> Assignments;
 
+  public:
   /// Conflict resolution for overlapping values.
   enum ConflictResolution {
     /// No overlap, simply keep this value.
@@ -2223,6 +2259,7 @@ class JoinVals {
     CR_Impossible
   };
 
+  private:
   /// Per-value info for LI. The lane bit masks are all relative to the final
   /// joined register, so they can be compared directly between SrcReg and
   /// DstReg.
@@ -2383,6 +2420,11 @@ public:
 
   /// Get the value assignments suitable for passing to LiveInterval::join.
   const int *getAssignments() const { return Assignments.data(); }
+
+  /// Get the conflict resolution for a value number.
+  ConflictResolution getResolution(unsigned Num) const {
+    return Vals[Num].Resolution;
+  }
 };
 
 } // end anonymous namespace
@@ -3115,7 +3157,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
                            LiveInterval *LI) {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     // Get the def location before markUnused() below invalidates it.
-    SlotIndex Def = LR.getValNumInfo(i)->def;
+    VNInfo *VNI = LR.getValNumInfo(i);
+    SlotIndex Def = VNI->def;
     switch (Vals[i].Resolution) {
     case CR_Keep: {
       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
@@ -3131,8 +3174,6 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
       // In such cases, removing this def from the main range must be
       // complemented by extending the main range to account for the liveness
       // of the other subrange.
-      VNInfo *VNI = LR.getValNumInfo(i);
-      SlotIndex Def = VNI->def;
       // The new end point of the main range segment to be extended.
       SlotIndex NewEnd;
       if (LI != nullptr) {
@@ -3272,7 +3313,8 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
 void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
                                           const LiveRange &ToMerge,
                                           LaneBitmask LaneMask,
-                                          CoalescerPair &CP) {
+                                          CoalescerPair &CP,
+                                          unsigned ComposeSubRegIdx) {
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   LI.refineSubRanges(
       Allocator, LaneMask,
@@ -3285,7 +3327,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
           joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
         }
       },
-      *LIS->getSlotIndexes(), *TRI);
+      *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
 }
 
 bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
@@ -3351,12 +3393,12 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
     if (!RHS.hasSubRanges()) {
       LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
                                      : TRI->getSubRegIndexLaneMask(SrcIdx);
-      mergeSubRangeInto(LHS, RHS, Mask, CP);
+      mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
     } else {
       // Pair up subranges and merge.
       for (LiveInterval::SubRange &R : RHS.subranges()) {
         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
-        mergeSubRangeInto(LHS, R, Mask, CP);
+        mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
       }
     }
     LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
@@ -3385,6 +3427,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   while (!ShrinkRegs.empty())
     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
 
+  // Scan and mark undef any DBG_VALUEs that would refer to a different value.
+  checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
+
   // Join RHS into LHS.
   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
 
@@ -3416,6 +3461,140 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
 }
 
+void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
+{
+  const SlotIndexes &Slots = *LIS->getSlotIndexes();
+  SmallVector<MachineInstr *, 8> ToInsert;
+
+  // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
+  // vreg => DbgValueLoc map.
+  auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
+    for (auto *X : ToInsert)
+      DbgVRegToValues[X->getOperand(0).getReg()].push_back({Slot, X});
+
+    ToInsert.clear();
+  };
+
+  // Iterate over all instructions, collecting them into the ToInsert vector.
+  // Once a non-debug instruction is found, record the slot index of the
+  // collected DBG_VALUEs.
+  for (auto &MBB : MF) {
+    SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
+
+    for (auto &MI : MBB) {
+      if (MI.isDebugValue() && MI.getOperand(0).isReg() &&
+          MI.getOperand(0).getReg().isVirtual()) {
+        ToInsert.push_back(&MI);
+      } else if (!MI.isDebugInstr()) {
+        CurrentSlot = Slots.getInstructionIndex(MI);
+        CloseNewDVRange(CurrentSlot);
+      }
+    }
+
+    // Close range of DBG_VALUEs at the end of blocks.
+    CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
+  }
+
+  // Sort all DBG_VALUEs we've seen by slot number.
+  for (auto &Pair : DbgVRegToValues)
+    llvm::sort(Pair.second);
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
+                                                     LiveRange &LHS,
+                                                     JoinVals &LHSVals,
+                                                     LiveRange &RHS,
+                                                     JoinVals &RHSVals) {
+  auto ScanForDstReg = [&](unsigned Reg) {
+    checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
+  };
+
+  auto ScanForSrcReg = [&](unsigned Reg) {
+    checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
+  };
+
+  // Scan for potentially unsound DBG_VALUEs: examine first the register number
+  // Reg, and then any other vregs that may have been merged into  it.
+  auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) {
+    Func(Reg);
+    if (DbgMergedVRegNums.count(Reg))
+      for (unsigned X : DbgMergedVRegNums[Reg])
+        Func(X);
+  };
+
+  // Scan for unsound updates of both the source and destination register.
+  PerformScan(CP.getSrcReg(), ScanForSrcReg);
+  PerformScan(CP.getDstReg(), ScanForDstReg);
+}
+
+void RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg,
+                                                         LiveRange &OtherLR,
+                                                         LiveRange &RegLR,
+                                                         JoinVals &RegVals) {
+  // Are there any DBG_VALUEs to examine?
+  auto VRegMapIt = DbgVRegToValues.find(Reg);
+  if (VRegMapIt == DbgVRegToValues.end())
+    return;
+
+  auto &DbgValueSet = VRegMapIt->second;
+  auto DbgValueSetIt = DbgValueSet.begin();
+  auto SegmentIt = OtherLR.begin();
+
+  bool LastUndefResult = false;
+  SlotIndex LastUndefIdx;
+
+  // If the "Other" register is live at a slot Idx, test whether Reg can
+  // safely be merged with it, or should be marked undef.
+  auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
+                      &LastUndefIdx](SlotIndex Idx) -> bool {
+    // Our worst-case performance typically happens with asan, causing very
+    // many DBG_VALUEs of the same location. Cache a copy of the most recent
+    // result for this edge-case.
+    if (LastUndefIdx == Idx)
+      return LastUndefResult;
+
+    // If the other range was live, and Reg's was not, the register coalescer
+    // will not have tried to resolve any conflicts. We don't know whether
+    // the DBG_VALUE will refer to the same value number, so it must be made
+    // undef.
+    auto OtherIt = RegLR.find(Idx);
+    if (OtherIt == RegLR.end())
+      return true;
+
+    // Both the registers were live: examine the conflict resolution record for
+    // the value number Reg refers to. CR_Keep meant that this value number
+    // "won" and the merged register definitely refers to that value. CR_Erase
+    // means the value number was a redundant copy of the other value, which
+    // was coalesced and Reg deleted. It's safe to refer to the other register
+    // (which will be the source of the copy).
+    auto Resolution = RegVals.getResolution(OtherIt->valno->id);
+    LastUndefResult = Resolution != JoinVals::CR_Keep &&
+                      Resolution != JoinVals::CR_Erase;
+    LastUndefIdx = Idx;
+    return LastUndefResult;
+  };
+
+  // Iterate over both the live-range of the "Other" register, and the set of
+  // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
+  // slot index. This relies on the DbgValueSet being ordered.
+  while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
+    if (DbgValueSetIt->first < SegmentIt->end) {
+      // "Other" is live and there is a DBG_VALUE of Reg: test if we should
+      // set it undef.
+      if (DbgValueSetIt->first >= SegmentIt->start &&
+          DbgValueSetIt->second->getOperand(0).getReg() != 0 &&
+          ShouldUndef(DbgValueSetIt->first)) {
+        // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
+        DbgValueSetIt->second->getOperand(0).setReg(0);
+        continue;
+      }
+      ++DbgValueSetIt;
+    } else {
+      ++SegmentIt;
+    }
+  }
+}
+
 namespace {
 
 /// Information concerning MBB coalescing priority.
@@ -3698,6 +3877,10 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   if (VerifyCoalescing)
     MF->verify(this, "Before register coalescing");
 
+  DbgVRegToValues.clear();
+  DbgMergedVRegNums.clear();
+  buildVRegToDbgValueMap(fn);
+
   RegClassInfo.runOnMachineFunction(fn);
 
   // Join (coalesce) intervals if requested.
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index ec0868acab38..a5bea1463468 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -31,6 +31,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
@@ -220,8 +221,8 @@ void RegScavenger::forward() {
         // Ideally we would like a way to model this, but leaving the
         // insert_subreg around causes both correctness and performance issues.
         bool SubUsed = false;
-        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
-          if (isRegUsed(*SubRegs)) {
+        for (const MCPhysReg &SubReg : TRI->subregs(Reg))
+          if (isRegUsed(SubReg)) {
             SubUsed = true;
             break;
           }
diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index e3f5abb6301f..4ee28d6bbb46 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -35,6 +35,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index 019de6554d2a..0f73973c8a51 100644
--- a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -17,9 +17,10 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index ddbbd0f8d6e9..8aa488e63913 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -54,6 +53,7 @@
 #include "llvm/IR/Use.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
@@ -63,6 +63,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -562,7 +563,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
 
   for (Argument *Arg : ByValArguments) {
     unsigned Offset = SSL.getObjectOffset(Arg);
-    unsigned Align = SSL.getObjectAlignment(Arg);
+    MaybeAlign Align(SSL.getObjectAlignment(Arg));
     Type *Ty = Arg->getType()->getPointerElementType();
 
     uint64_t Size = DL.getTypeStoreSize(Ty);
@@ -579,7 +580,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
                       DIExpression::ApplyOffset, -Offset);
     Arg->replaceAllUsesWith(NewArg);
     IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
-    IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);
+    IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlign(), Size);
   }
 
   // Allocate space for every unsafe static AllocaInst on the unsafe stack.
diff --git a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index b4037499d7d1..ee72de67d875 100644
--- a/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -29,6 +29,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include <algorithm>
@@ -848,21 +849,20 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
                                                 bool &ModifiedDT) {
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
   if (II) {
+    unsigned Alignment;
     switch (II->getIntrinsicID()) {
     default:
       break;
     case Intrinsic::masked_load: {
       // Scalarize unsupported vector masked load
-      unsigned Alignment =
-        cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment)))
         return false;
       scalarizeMaskedLoad(CI, ModifiedDT);
       return true;
     }
     case Intrinsic::masked_store: {
-      unsigned Alignment =
-        cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+      Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
       if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(),
                                   MaybeAlign(Alignment)))
         return false;
@@ -870,12 +870,15 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
       return true;
     }
     case Intrinsic::masked_gather:
-      if (TTI->isLegalMaskedGather(CI->getType()))
+      Alignment = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      if (TTI->isLegalMaskedGather(CI->getType(), MaybeAlign(Alignment)))
         return false;
       scalarizeMaskedGather(CI, ModifiedDT);
       return true;
     case Intrinsic::masked_scatter:
-      if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType()))
+      Alignment = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+      if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType(),
+                                    MaybeAlign(Alignment)))
         return false;
       scalarizeMaskedScatter(CI, ModifiedDT);
       return true;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 96a1f86c3e04..d11406cc330f 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -270,8 +270,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
         Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
                                                         RegUse, UseOp));
         ST.adjustSchedDependency(SU, UseSU, Dep);
-      } else
+      } else {
         Dep.setLatency(0);
+        // FIXME: We could always let target to adjustSchedDependency(), and
+        // remove this condition, but that currently asserts in Hexagon BE.
+        if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
+          ST.adjustSchedDependency(SU, UseSU, Dep);
+      }
 
       UseSU->addPred(Dep);
     }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e8950b58d42d..e5bc08b9280a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -131,6 +131,7 @@ namespace {
     const TargetLowering &TLI;
     CombineLevel Level;
     CodeGenOpt::Level OptLevel;
+    bool LegalDAG = false;
     bool LegalOperations = false;
     bool LegalTypes = false;
     bool ForCodeSize;
@@ -179,6 +180,12 @@ namespace {
         AddToWorklist(Node);
     }
 
+    /// Convenient shorthand to add a node and all of its user to the worklist.
+    void AddToWorklistWithUsers(SDNode *N) {
+      AddUsersToWorklist(N);
+      AddToWorklist(N);
+    }
+
     // Prune potentially dangling nodes. This is called after
     // any visit to a node, but should also be called during a visit after any
     // failed combine which may have created a DAG node.
@@ -217,14 +224,16 @@ namespace {
     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
           OptLevel(OL), AA(AA) {
-      ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+      ForCodeSize = DAG.shouldOptForSize();
 
       MaximumLegalStoreInBits = 0;
+      // We use the minimum store size here, since that's all we can guarantee
+      // for the scalable vector types.
       for (MVT VT : MVT::all_valuetypes())
         if (EVT(VT).isSimple() && VT != MVT::Other &&
             TLI.isTypeLegal(EVT(VT)) &&
-            VT.getSizeInBits() >= MaximumLegalStoreInBits)
-          MaximumLegalStoreInBits = VT.getSizeInBits();
+            VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
+          MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
     }
 
     void ConsiderForPruning(SDNode *N) {
@@ -622,7 +631,7 @@ namespace {
                            ConstantSDNode *Mask, SDNode *&NodeToMask);
     /// Attempt to propagate a given AND node back to load leaves so that they
     /// can be combined into narrow loads.
-    bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
+    bool BackwardsPropagateMask(SDNode *N);
 
     /// Helper function for MergeConsecutiveStores which merges the
     /// component store chains.
@@ -1026,8 +1035,7 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
 
   // Push the new node and any (possibly new) users onto the worklist.
-  AddToWorklist(TLO.New.getNode());
-  AddUsersToWorklist(TLO.New.getNode());
+  AddToWorklistWithUsers(TLO.New.getNode());
 
   // Finally, if the node is now dead, remove it from the graph.  The node
   // may not be dead if the replacement process recursively simplified to
@@ -1393,6 +1401,7 @@ bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
 void DAGCombiner::Run(CombineLevel AtLevel) {
   // set the instance variables, so that the various visit routines may use it.
   Level = AtLevel;
+  LegalDAG = Level >= AfterLegalizeDAG;
   LegalOperations = Level >= AfterLegalizeVectorOps;
   LegalTypes = Level >= AfterLegalizeTypes;
 
@@ -1419,14 +1428,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
 
     // If this combine is running after legalizing the DAG, re-legalize any
     // nodes pulled off the worklist.
-    if (Level == AfterLegalizeDAG) {
+    if (LegalDAG) {
       SmallSetVector<SDNode *, 16> UpdatedNodes;
       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
 
-      for (SDNode *LN : UpdatedNodes) {
-        AddUsersToWorklist(LN);
-        AddToWorklist(LN);
-      }
+      for (SDNode *LN : UpdatedNodes)
+        AddToWorklistWithUsers(LN);
+
       if (!NIsValid)
         continue;
     }
@@ -2800,6 +2808,96 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
   return SDValue();
 }
 
+// If we are facing some sort of diamond carry/borrow in/out pattern try to
+// match patterns like:
+//
+//          (uaddo A, B)            CarryIn
+//            |  \                     |
+//            |   \                    |
+//    PartialSum   PartialCarryOutX   /
+//            |        |             /
+//            |    ____|____________/
+//            |   /    |
+//     (uaddo *, *)    \________
+//       |  \                   \
+//       |   \                   |
+//       |    PartialCarryOutY   |
+//       |        \              |
+//       |         \            /
+//   AddCarrySum    |    ______/
+//                  |   /
+//   CarryOut = (or *, *)
+//
+// And generate ADDCARRY (or SUBCARRY) with two result values:
+//
+//    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
+//
+// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
+// a single path for carry/borrow out propagation:
+static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+                                   const TargetLowering &TLI, SDValue Carry0,
+                                   SDValue Carry1, SDNode *N) {
+  if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
+    return SDValue();
+  unsigned Opcode = Carry0.getOpcode();
+  if (Opcode != Carry1.getOpcode())
+    return SDValue();
+  if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
+    return SDValue();
+
+  // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
+  // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
+  // the above ASCII art.)
+  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+      Carry1.getOperand(1) != Carry0.getValue(0))
+    std::swap(Carry0, Carry1);
+  if (Carry1.getOperand(0) != Carry0.getValue(0) &&
+      Carry1.getOperand(1) != Carry0.getValue(0))
+    return SDValue();
+
+  // The carry in value must be on the righthand side for subtraction.
+  unsigned CarryInOperandNum =
+      Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
+  if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
+    return SDValue();
+  SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
+
+  unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+  if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
+    return SDValue();
+
+  // Verify that the carry/borrow in is plausibly a carry/borrow bit.
+  // TODO: make getAsCarry() aware of how partial carries are merged.
+  if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
+    return SDValue();
+  CarryIn = CarryIn.getOperand(0);
+  if (CarryIn.getValueType() != MVT::i1)
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue Merged =
+      DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
+                  Carry0.getOperand(1), CarryIn);
+
+  // Please note that because we have proven that the result of the UADDO/USUBO
+  // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
+  // therefore prove that if the first UADDO/USUBO overflows, the second
+  // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
+  // maximum value.
+  //
+  //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
+  //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
+  //
+  // This is important because it means that OR and XOR can be used to merge
+  // carry flags; and that AND can return a constant zero.
+  //
+  // TODO: match other operations that can merge flags (ADD, etc)
+  DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
+  if (N->getOpcode() == ISD::AND)
+    return DAG.getConstant(0, DL, MVT::i1);
+  return Merged.getValue(1);
+}
+
 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
                                        SDNode *N) {
   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
@@ -3006,6 +3104,20 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
                                    N1.getOperand(0)));
 
+  // A - (A & B)  ->  A & (~B)
+  if (N1.getOpcode() == ISD::AND) {
+    SDValue A = N1.getOperand(0);
+    SDValue B = N1.getOperand(1);
+    if (A != N0)
+      std::swap(A, B);
+    if (A == N0 &&
+        (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
+      SDValue InvB =
+          DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
+      return DAG.getNode(ISD::AND, DL, VT, A, InvB);
+    }
+  }
+
   // fold (X - (-Y * Z)) -> (X + (Y * Z))
   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -4225,7 +4337,6 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
   // Only do this if the current op isn't legal and the flipped is.
   unsigned Opcode = N->getOpcode();
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (!TLI.isOperationLegal(Opcode, VT) &&
       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
@@ -4543,8 +4654,8 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
   if (LL == RL && LR == RR) {
-    ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
-                                : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
+    ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
+                                : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
     if (NewCC != ISD::SETCC_INVALID &&
         (!LegalOperations ||
          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
@@ -4856,7 +4967,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
   return true;
 }
 
-bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!Mask)
     return false;
@@ -5092,6 +5203,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue Shuffle = XformToShuffleWithZero(N))
       return Shuffle;
 
+  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+    return Combined;
+
   // fold (and (or x, C), D) -> D if (C & D) == D
   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -5238,14 +5352,13 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     }
   }
 
-  if (Level >= AfterLegalizeTypes) {
+  if (LegalTypes) {
     // Attempt to propagate the AND back up to the leaves which, if they're
     // loads, can be combined to narrow loads and the AND node can be removed.
     // Perform after legalization so that extend nodes will already be
     // combined into the loads.
-    if (BackwardsPropagateMask(N, DAG)) {
+    if (BackwardsPropagateMask(N))
       return SDValue(N, 0);
-    }
   }
 
   if (SDValue Combined = visitANDLike(N0, N1, N))
@@ -5787,6 +5900,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (SDValue Combined = visitORLike(N0, N1, N))
     return Combined;
 
+  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+    return Combined;
+
   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
     return BSwap;
@@ -6418,7 +6534,7 @@ static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
 // Check if the bytes offsets we are looking at match with either big or
 // little endian value loaded. Return true for big endian, false for little
 // endian, and None if match failed.
-static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
                                   int64_t FirstOffset) {
   // The endian can be decided only when it is 2 bytes at least.
   unsigned Width = ByteOffsets.size();
@@ -6491,7 +6607,6 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
     return SDValue();
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
     return SDValue();
 
@@ -6499,7 +6614,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
   // to the same base address. Collect bytes offsets from Base address into 
   // ByteOffsets. 
   SDValue CombinedValue;
-  SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+  SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX);
   int64_t FirstOffset = INT64_MAX;
   StoreSDNode *FirstStore = nullptr;
   Optional<BaseIndexOffset> Base;
@@ -6655,13 +6770,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
     return SDValue();
   unsigned ByteWidth = VT.getSizeInBits() / 8;
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  // Before legalize we can introduce too wide illegal loads which will be later
-  // split into legal sized loads. This enables us to combine i64 load by i8
-  // patterns to a couple of i32 loads on 32 bit targets.
-  if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
-    return SDValue();
-
   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
   auto MemoryByteOffset = [&] (ByteProvider P) {
     assert(P.isMemory() && "Must be a memory byte provider");
@@ -6683,12 +6791,22 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
 
   // Check if all the bytes of the OR we are looking at are loaded from the same
   // base address. Collect bytes offsets from Base address in ByteOffsets.
-  SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
-  for (unsigned i = 0; i < ByteWidth; i++) {
+  SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
+  unsigned ZeroExtendedBytes = 0;
+  for (int i = ByteWidth - 1; i >= 0; --i) {
     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
-    if (!P || !P->isMemory()) // All the bytes must be loaded from memory
+    if (!P)
       return SDValue();
 
+    if (P->isConstantZero()) {
+      // It's OK for the N most significant bytes to be 0, we can just
+      // zero-extend the load.
+      if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
+        return SDValue();
+      continue;
+    }
+    assert(P->isMemory() && "provenance should either be memory or zero");
+
     LoadSDNode *L = P->Load;
     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
            !L->isIndexed() &&
@@ -6727,9 +6845,26 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   assert(Base && "Base address of the accessed memory location must be set");
   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
 
+  bool NeedsZext = ZeroExtendedBytes > 0;
+
+  EVT MemVT =
+      EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
+
+  if (!MemVT.isSimple())
+    return SDValue();
+
+  // Before legalize we can introduce too wide illegal loads which will be later
+  // split into legal sized loads. This enables us to combine i64 load by i8
+  // patterns to a couple of i32 loads on 32 bit targets.
+  if (LegalOperations &&
+      !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+                            MemVT))
+    return SDValue();
+
   // Check if the bytes of the OR we are looking at match with either big or
   // little endian value load
-  Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+  Optional<bool> IsBigEndian = isBigEndian(
+      makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
   if (!IsBigEndian.hasValue())
     return SDValue();
 
@@ -6742,7 +6877,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   LoadSDNode *FirstLoad = FirstByteProvider->Load;
 
   // The node we are looking at matches with the pattern, check if we can
-  // replace it with a single load and bswap if needed.
+  // replace it with a single (possibly zero-extended) load and bswap + shift if
+  // needed.
 
   // If the load needs byte swap check if the target supports it
   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
@@ -6750,25 +6886,45 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
   // Before legalize we can introduce illegal bswaps which will be later
   // converted to an explicit bswap sequence. This way we end up with a single
   // load and byte shuffling instead of several loads and byte shuffling.
-  if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+  // We do not introduce illegal bswaps when zero-extending as this tends to
+  // introduce too many arithmetic instructions.
+  if (NeedsBswap && (LegalOperations || NeedsZext) &&
+      !TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  // If we need to bswap and zero extend, we have to insert a shift. Check that
+  // it is legal.
+  if (NeedsBswap && NeedsZext && LegalOperations &&
+      !TLI.isOperationLegal(ISD::SHL, VT))
     return SDValue();
 
   // Check that a load of the wide type is both allowed and fast on the target
   bool Fast = false;
-  bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
-                                        VT, *FirstLoad->getMemOperand(), &Fast);
+  bool Allowed =
+      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
+                             *FirstLoad->getMemOperand(), &Fast);
   if (!Allowed || !Fast)
     return SDValue();
 
-  SDValue NewLoad =
-      DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
-                  FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
+  SDValue NewLoad = DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
+                                   SDLoc(N), VT, Chain, FirstLoad->getBasePtr(),
+                                   FirstLoad->getPointerInfo(), MemVT,
+                                   FirstLoad->getAlignment());
 
   // Transfer chain users from old loads to the new load.
   for (LoadSDNode *L : Loads)
     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
 
-  return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
+  if (!NeedsBswap)
+    return NewLoad;
+
+  SDValue ShiftedLoad =
+      NeedsZext
+          ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
+                        DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
+                                                   SDLoc(N), LegalOperations))
+          : NewLoad;
+  return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
 }
 
 // If the target has andn, bsl, or a similar bit-select instruction,
@@ -6904,7 +7060,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue LHS, RHS, CC;
   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
-                                               LHS.getValueType().isInteger());
+                                               LHS.getValueType());
     if (!LegalOperations ||
         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
       switch (N0Opcode) {
@@ -6964,6 +7120,13 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
                        DAG.getAllOnesConstant(DL, VT));
   }
 
+  // fold (not (add X, -1)) -> (neg X)
+  if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
+      isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+                       N0.getOperand(0));
+  }
+
   // fold (xor (and x, y), y) -> (and (not x), y)
   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
     SDValue X = N0.getOperand(0);
@@ -7051,6 +7214,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
+  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+    return Combined;
+
   return SDValue();
 }
 
@@ -7567,8 +7733,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     if (VT.isVector())
       ExtVT = EVT::getVectorVT(*DAG.getContext(),
                                ExtVT, VT.getVectorNumElements());
-    if ((!LegalOperations ||
-         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+    if (!LegalOperations ||
+        TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
+        TargetLowering::Legal)
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
                          N0.getOperand(0), DAG.getValueType(ExtVT));
   }
@@ -7776,26 +7943,40 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     }
   }
 
-  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
-  // TODO - support non-uniform vector shift amounts.
   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
       N0.getOperand(0).getOpcode() == ISD::SRL) {
-    if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
+    SDValue InnerShift = N0.getOperand(0);
+    // TODO - support non-uniform vector shift amounts.
+    if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
       uint64_t c1 = N001C->getZExtValue();
       uint64_t c2 = N1C->getZExtValue();
-      EVT InnerShiftVT = N0.getOperand(0).getValueType();
-      EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
+      EVT InnerShiftVT = InnerShift.getValueType();
+      EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
+      // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
       if (c1 + OpSizeInBits == InnerShiftSize) {
-        SDLoc DL(N0);
+        SDLoc DL(N);
         if (c1 + c2 >= InnerShiftSize)
           return DAG.getConstant(0, DL, VT);
-        return DAG.getNode(ISD::TRUNCATE, DL, VT,
-                           DAG.getNode(ISD::SRL, DL, InnerShiftVT,
-                                       N0.getOperand(0).getOperand(0),
-                                       DAG.getConstant(c1 + c2, DL,
-                                                       ShiftCountVT)));
+        SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+        SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+                                       InnerShift.getOperand(0), NewShiftAmt);
+        return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
+      }
+      // In the more general case, we can clear the high bits after the shift:
+      // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
+      if (N0.hasOneUse() && InnerShift.hasOneUse() &&
+          c1 + c2 < InnerShiftSize) {
+        SDLoc DL(N);
+        SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
+        SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
+                                       InnerShift.getOperand(0), NewShiftAmt);
+        SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
+                                                            OpSizeInBits - c2),
+                                       DL, InnerShiftVT);
+        SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
+        return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
       }
     }
   }
@@ -8585,6 +8766,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
     return Chain;
 
+  // Try transforming N to an indexed store.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
@@ -8609,6 +8794,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
 
+  // Try transforming N to an indexed load.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
   return SDValue();
 }
 
@@ -9108,6 +9297,8 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
     return SDValue();
 
+  assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
+
   SDLoc DL(N);
   const unsigned NumSplits =
       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
@@ -9125,8 +9316,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
 
-    BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
-                          DAG.getConstant(Stride, DL, BasePtr.getValueType()));
+    BasePtr = DAG.getMemBasePlusOffset(BasePtr, Stride, DL);
 
     Loads.push_back(SplitLoad.getValue(0));
     Chains.push_back(SplitLoad.getValue(1));
@@ -9365,11 +9555,10 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
 
   SDLoc dl(Ld);
   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
-  SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
-                                      Ld->getBasePtr(), Ld->getMask(),
-                                      PassThru, Ld->getMemoryVT(),
-                                      Ld->getMemOperand(), ExtLoadType,
-                                      Ld->isExpandingLoad());
+  SDValue NewLoad = DAG.getMaskedLoad(
+      VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
+      PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
+      ExtLoadType, Ld->isExpandingLoad());
   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
   return NewLoad;
 }
@@ -9397,10 +9586,15 @@ static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
     SDLoc DL(N);
-    SDValue NotX = DAG.getNOT(DL, X, VT);
-    SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
-    auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
-    return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+    unsigned ShCt = VT.getSizeInBits() - 1;
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+      SDValue NotX = DAG.getNOT(DL, X, VT);
+      SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
+      auto ShiftOpcode =
+        N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
+      return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
+    }
   }
   return SDValue();
 }
@@ -9671,6 +9865,29 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   return (Known.Zero | 1).isAllOnesValue();
 }
 
+/// Given an extending node with a pop-count operand, if the target does not
+/// support a pop-count in the narrow source type but does support it in the
+/// destination type, widen the pop-count to the destination type.
+static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
+  assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
+          Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
+
+  SDValue CtPop = Extend->getOperand(0);
+  if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
+    return SDValue();
+
+  EVT VT = Extend->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
+      !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
+    return SDValue();
+
+  // zext (ctpop X) --> ctpop (zext X)
+  SDLoc DL(Extend);
+  SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
+  return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
+}
+
 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -9921,6 +10138,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
     return NewVSel;
 
+  if (SDValue NewCtPop = widenCtPop(N, DAG))
+    return NewCtPop;
+
   return SDValue();
 }
 
@@ -10067,6 +10287,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       return SCC;
   }
 
+  if (SDValue NewCtPop = widenCtPop(N, DAG))
+    return NewCtPop;
+
   return SDValue();
 }
 
@@ -10273,17 +10496,14 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (DAG.getDataLayout().isBigEndian())
     ShAmt = AdjustBigEndianShift(ShAmt);
 
-  EVT PtrType = N0.getOperand(1).getValueType();
   uint64_t PtrOff = ShAmt / 8;
   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
   SDLoc DL(LN0);
   // The original load itself didn't wrap, so an offset within it doesn't.
   SDNodeFlags Flags;
   Flags.setNoUnsignedWrap(true);
-  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
-                               PtrType, LN0->getBasePtr(),
-                               DAG.getConstant(PtrOff, DL, PtrType),
-                               Flags);
+  SDValue NewPtr =
+      DAG.getMemBasePlusOffset(LN0->getBasePtr(), PtrOff, DL, Flags);
   AddToWorklist(NewPtr.getNode());
 
   SDValue Load;
@@ -10735,16 +10955,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
     SDValue VecSrc = N0.getOperand(0);
-    EVT SrcVT = VecSrc.getValueType();
-    if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
+    EVT VecSrcVT = VecSrc.getValueType();
+    if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
         (!LegalOperations ||
-         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
+         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
       SDLoc SL(N);
 
       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
-      unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
-                         VecSrc, DAG.getConstant(Idx, SL, IdxVT));
+      unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
+                         DAG.getConstant(Idx, SL, IdxVT));
     }
   }
 
@@ -11299,11 +11519,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // Floating-point multiply-add with intermediate rounding.
-  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+  bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
 
   // Floating-point multiply-add without intermediate rounding.
   bool HasFMA =
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
 
   // No valid opcode, do not combine.
@@ -11359,7 +11579,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   if (N0.getOpcode() == ISD::FP_EXTEND) {
     SDValue N00 = N0.getOperand(0);
     if (isContractableFMUL(N00) &&
-        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+        TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                            N00.getValueType())) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                      N00.getOperand(0)),
@@ -11373,7 +11594,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   if (N1.getOpcode() == ISD::FP_EXTEND) {
     SDValue N10 = N1.getOperand(0);
     if (isContractableFMUL(N10) &&
-        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+        TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                            N10.getValueType())) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                      N10.getOperand(0)),
@@ -11427,7 +11649,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
       if (N02.getOpcode() == ISD::FP_EXTEND) {
         SDValue N020 = N02.getOperand(0);
         if (isContractableFMUL(N020) &&
-            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+            TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                                N020.getValueType())) {
           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
                                       N020.getOperand(0), N020.getOperand(1),
                                       N1, Flags);
@@ -11456,7 +11679,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
       if (N00.getOpcode() == PreferredFusedOpcode) {
         SDValue N002 = N00.getOperand(2);
         if (isContractableFMUL(N002) &&
-            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+            TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                                N00.getValueType())) {
           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
                                       N002.getOperand(0), N002.getOperand(1),
                                       N1, Flags);
@@ -11471,7 +11695,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
       if (N12.getOpcode() == ISD::FP_EXTEND) {
         SDValue N120 = N12.getOperand(0);
         if (isContractableFMUL(N120) &&
-            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+            TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                                N120.getValueType())) {
           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
                                       N120.getOperand(0), N120.getOperand(1),
                                       N0, Flags);
@@ -11489,7 +11714,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
       if (N10.getOpcode() == PreferredFusedOpcode) {
         SDValue N102 = N10.getOperand(2);
         if (isContractableFMUL(N102) &&
-            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+            TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                                N10.getValueType())) {
           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
                                       N102.getOperand(0), N102.getOperand(1),
                                       N0, Flags);
@@ -11510,11 +11736,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
 
   const TargetOptions &Options = DAG.getTarget().Options;
   // Floating-point multiply-add with intermediate rounding.
-  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+  bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
 
   // Floating-point multiply-add without intermediate rounding.
   bool HasFMA =
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
 
   // No valid opcode, do not combine.
@@ -11579,7 +11805,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   if (N0.getOpcode() == ISD::FP_EXTEND) {
     SDValue N00 = N0.getOperand(0);
     if (isContractableFMUL(N00) &&
-        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+        TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                            N00.getValueType())) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                      N00.getOperand(0)),
@@ -11595,7 +11822,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   if (N1.getOpcode() == ISD::FP_EXTEND) {
     SDValue N10 = N1.getOperand(0);
     if (isContractableFMUL(N10) &&
-        TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+        TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                            N10.getValueType())) {
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
                          DAG.getNode(ISD::FNEG, SL, VT,
                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11617,7 +11845,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
     if (N00.getOpcode() == ISD::FNEG) {
       SDValue N000 = N00.getOperand(0);
       if (isContractableFMUL(N000) &&
-          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+          TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                              N00.getValueType())) {
         return DAG.getNode(ISD::FNEG, SL, VT,
                            DAG.getNode(PreferredFusedOpcode, SL, VT,
                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11640,7 +11869,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
     if (N00.getOpcode() == ISD::FP_EXTEND) {
       SDValue N000 = N00.getOperand(0);
       if (isContractableFMUL(N000) &&
-          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+          TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                              N000.getValueType())) {
         return DAG.getNode(ISD::FNEG, SL, VT,
                            DAG.getNode(PreferredFusedOpcode, SL, VT,
                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
@@ -11671,7 +11901,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
     // fold (fsub x, (fma y, z, (fmul u, v)))
     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
-        isContractableFMUL(N1.getOperand(2))) {
+        isContractableFMUL(N1.getOperand(2)) &&
+        N1->hasOneUse()) {
       SDValue N20 = N1.getOperand(2).getOperand(0);
       SDValue N21 = N1.getOperand(2).getOperand(1);
       return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11686,12 +11917,14 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
 
     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
-    if (N0.getOpcode() == PreferredFusedOpcode) {
+    if (N0.getOpcode() == PreferredFusedOpcode &&
+        N0->hasOneUse()) {
       SDValue N02 = N0.getOperand(2);
       if (N02.getOpcode() == ISD::FP_EXTEND) {
         SDValue N020 = N02.getOperand(0);
         if (isContractableFMUL(N020) &&
-            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+            TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                                N020.getValueType())) {
           return DAG.getNode(PreferredFusedOpcode, SL, VT,
                              N0.getOperand(0), N0.getOperand(1),
                              DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11716,7 +11949,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
       if (N00.getOpcode() == PreferredFusedOpcode) {
         SDValue N002 = N00.getOperand(2);
         if (isContractableFMUL(N002) &&
-            TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+            TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                                N00.getValueType())) {
           return DAG.getNode(PreferredFusedOpcode, SL, VT,
                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
                                          N00.getOperand(0)),
@@ -11736,10 +11970,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
     if (N1.getOpcode() == PreferredFusedOpcode &&
-        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+        N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+        N1->hasOneUse()) {
       SDValue N120 = N1.getOperand(2).getOperand(0);
       if (isContractableFMUL(N120) &&
-          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+          TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                              N120.getValueType())) {
         SDValue N1200 = N120.getOperand(0);
         SDValue N1201 = N120.getOperand(1);
         return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11768,7 +12004,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
       SDValue N101 = CvtSrc.getOperand(1);
       SDValue N102 = CvtSrc.getOperand(2);
       if (isContractableFMUL(N102) &&
-          TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+          TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
+                              CvtSrc.getValueType())) {
         SDValue N1020 = N102.getOperand(0);
         SDValue N1021 = N102.getOperand(1);
         return DAG.getNode(PreferredFusedOpcode, SL, VT,
@@ -11812,7 +12049,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   // Floating-point multiply-add without intermediate rounding.
   bool HasFMA =
       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
-      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
 
   // Floating-point multiply-add with intermediate rounding. This can result
@@ -12402,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     }
   }
 
+  // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
+  // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
+  if (!TLI.isFNegFree(VT) &&
+      TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations,
+                             ForCodeSize) == 2)
+    return DAG.getNode(ISD::FNEG, DL, VT,
+                       TLI.getNegatedExpression(SDValue(N, 0), DAG,
+                                                LegalOperations, ForCodeSize),
+                       Flags);
   return SDValue();
 }
 
@@ -12738,7 +12984,7 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
 
     // Assume that libcalls are the smallest code.
     // TODO: This restriction should probably be lifted for vectors.
-    if (DAG.getMachineFunction().getFunction().hasOptSize())
+    if (ForCodeSize)
       return SDValue();
 
     // pow(X, 0.25) --> sqrt(sqrt(X))
@@ -13135,6 +13381,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
     return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
 
+  // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is
+  // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it
+  // was called from a context with a nsz flag if the input fsub does not.
+  if (N0.getOpcode() == ISD::FSUB &&
+      (DAG.getTarget().Options.NoSignedZerosFPMath ||
+       N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
+    return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
+                       N0.getOperand(0), N->getFlags());
+  }
+
   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
   // constant pool values.
   if (!TLI.isFNegFree(VT) &&
@@ -13168,9 +13424,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
     if (CFP1) {
       APFloat CVal = CFP1->getValueAPF();
       CVal.changeSign();
-      if (Level >= AfterLegalizeDAG &&
-          (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
-           TLI.isOperationLegal(ISD::ConstantFP, VT)))
+      if (LegalDAG && (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
+                       TLI.isOperationLegal(ISD::ConstantFP, VT)))
         return DAG.getNode(
             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
@@ -13423,12 +13678,22 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   EVT VT;
   unsigned AS;
 
-  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
       return false;
     VT = LD->getMemoryVT();
     AS = LD->getAddressSpace();
-  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
+    if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+      return false;
+    VT = ST->getMemoryVT();
+    AS = ST->getAddressSpace();
+  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
+    if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+      return false;
+    VT = LD->getMemoryVT();
+    AS = LD->getAddressSpace();
+  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
       return false;
     VT = ST->getMemoryVT();
@@ -13462,38 +13727,64 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
                                    VT.getTypeForEVT(*DAG.getContext()), AS);
 }
 
-/// Try turning a load/store into a pre-indexed load/store when the base
-/// pointer is an add or subtract and it has other uses besides the load/store.
-/// After the transformation, the new indexed load/store has effectively folded
-/// the add/subtract in and all of its other uses are redirected to the
-/// new load/store.
-bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
-  if (Level < AfterLegalizeDAG)
-    return false;
-
-  bool isLoad = true;
-  SDValue Ptr;
-  EVT VT;
-  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
+                                     bool &IsLoad, bool &IsMasked, SDValue &Ptr,
+                                     const TargetLowering &TLI) {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     if (LD->isIndexed())
       return false;
-    VT = LD->getMemoryVT();
-    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
-        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+    EVT VT = LD->getMemoryVT();
+    if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
       return false;
     Ptr = LD->getBasePtr();
-  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     if (ST->isIndexed())
       return false;
-    VT = ST->getMemoryVT();
-    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
-        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+    EVT VT = ST->getMemoryVT();
+    if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
       return false;
     Ptr = ST->getBasePtr();
-    isLoad = false;
+    IsLoad = false;
+  } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
+    if (LD->isIndexed())
+      return false;
+    EVT VT = LD->getMemoryVT();
+    if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
+        !TLI.isIndexedMaskedLoadLegal(Dec, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+    IsMasked = true;
+  } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
+    if (ST->isIndexed())
+      return false;
+    EVT VT = ST->getMemoryVT();
+    if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
+        !TLI.isIndexedMaskedStoreLegal(Dec, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    IsLoad = false;
+    IsMasked = true;
   } else {
     return false;
   }
+  return true;
+}
+
+/// Try turning a load/store into a pre-indexed load/store when the base
+/// pointer is an add or subtract and it has other uses besides the load/store.
+/// After the transformation, the new indexed load/store has effectively folded
+/// the add/subtract in and all of its other uses are redirected to the
+/// new load/store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+  if (Level < AfterLegalizeDAG)
+    return false;
+
+  bool IsLoad = true;
+  bool IsMasked = false;
+  SDValue Ptr;
+  if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
+                                Ptr, TLI))
+    return false;
 
   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
   // out.  There is no reason to make this a preinc/predec.
@@ -13535,8 +13826,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     return false;
 
   // Check #2.
-  if (!isLoad) {
-    SDValue Val = cast<StoreSDNode>(N)->getValue();
+  if (!IsLoad) {
+    SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
+                           : cast<StoreSDNode>(N)->getValue();
 
     // Would require a copy.
     if (Val == BasePtr)
@@ -13612,18 +13904,26 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     return false;
 
   SDValue Result;
-  if (isLoad)
-    Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
-                                BasePtr, Offset, AM);
-  else
-    Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
-                                 BasePtr, Offset, AM);
+  if (!IsMasked) {
+    if (IsLoad)
+      Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+    else
+      Result =
+          DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
+  } else {
+    if (IsLoad)
+      Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+                                        Offset, AM);
+    else
+      Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
+                                         Offset, AM);
+  }
   ++PreIndexedNodes;
   ++NodesCombined;
   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
              Result.getNode()->dump(&DAG); dbgs() << '\n');
   WorklistRemover DeadNodes(*this);
-  if (isLoad) {
+  if (IsLoad) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   } else {
@@ -13677,7 +13977,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
 
     // We can now generate the new expression.
     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
-    SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
+    SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
 
     SDValue NewUse = DAG.getNode(Opcode,
                                  DL,
@@ -13687,7 +13987,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   }
 
   // Replace the uses of Ptr with uses of the updated base value.
-  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
+  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
   deleteAndRecombine(Ptr.getNode());
   AddToWorklist(Result.getNode());
 
@@ -13702,29 +14002,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   if (Level < AfterLegalizeDAG)
     return false;
 
-  bool isLoad = true;
+  bool IsLoad = true;
+  bool IsMasked = false;
   SDValue Ptr;
-  EVT VT;
-  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
-    if (LD->isIndexed())
-      return false;
-    VT = LD->getMemoryVT();
-    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
-        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
-      return false;
-    Ptr = LD->getBasePtr();
-  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
-    if (ST->isIndexed())
-      return false;
-    VT = ST->getMemoryVT();
-    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
-        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
-      return false;
-    Ptr = ST->getBasePtr();
-    isLoad = false;
-  } else {
+  if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked,
+                                Ptr, TLI))
     return false;
-  }
 
   if (Ptr.getNode()->hasOneUse())
     return false;
@@ -13760,7 +14043,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
 
         // If all the uses are load / store addresses, then don't do the
         // transformation.
-        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
           bool RealUse = false;
           for (SDNode *UseUse : Use->uses()) {
             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
@@ -13786,18 +14069,24 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
       Worklist.push_back(Op);
       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
-        SDValue Result = isLoad
-          ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
-                               BasePtr, Offset, AM)
-          : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
-                                BasePtr, Offset, AM);
+        SDValue Result;
+        if (!IsMasked)
+          Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
+                                               Offset, AM)
+                          : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
+                                                BasePtr, Offset, AM);
+        else
+          Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
+                                                     BasePtr, Offset, AM)
+                          : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
+                                                      BasePtr, Offset, AM);
         ++PostIndexedNodes;
         ++NodesCombined;
         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
                    dbgs() << '\n');
         WorklistRemover DeadNodes(*this);
-        if (isLoad) {
+        if (IsLoad) {
           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
         } else {
@@ -13809,7 +14098,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
 
         // Replace the uses of Use with uses of the updated base value.
         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
-                                      Result.getValue(isLoad ? 1 : 0));
+                                      Result.getValue(IsLoad ? 1 : 0));
         deleteAndRecombine(Op);
         return true;
       }
@@ -13923,8 +14212,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
   // n:th least significant byte of the stored value.
   if (DAG.getDataLayout().isBigEndian())
-    Offset = (STMemType.getStoreSizeInBits() -
-              LDMemType.getStoreSizeInBits()) / 8 - Offset;
+    Offset = ((int64_t)STMemType.getStoreSizeInBits() -
+              (int64_t)LDMemType.getStoreSizeInBits()) / 8 - Offset;
 
   // Check that the stored value cover all bits that are loaded.
   bool STCoversLD =
@@ -14066,7 +14355,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
     return V;
 
   // Try to infer better alignment information than the load already has.
-  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+  if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
         SDValue NewLoad = DAG.getExtLoad(
@@ -14786,8 +15075,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   SDValue Ptr = St->getBasePtr();
   if (StOffset) {
     SDLoc DL(IVal);
-    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
-                      Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
+    Ptr = DAG.getMemBasePlusOffset(Ptr, StOffset, DL);
     NewAlign = MinAlign(NewAlign, StOffset);
   }
 
@@ -14898,10 +15186,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
         return SDValue();
 
-      SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
-                                   Ptr.getValueType(), Ptr,
-                                   DAG.getConstant(PtrOff, SDLoc(LD),
-                                                   Ptr.getValueType()));
+      SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD));
       SDValue NewLD =
           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
@@ -15081,7 +15366,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   // The latest Node in the DAG.
   SDLoc DL(StoreNodes[0].MemNode);
 
-  int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+  TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
   unsigned SizeInBits = NumStores * ElementSizeBits;
   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
 
@@ -15466,7 +15751,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
       Attribute::NoImplicitFloat);
 
   // This function cannot currently deal with non-byte-sized memory sizes.
-  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
+  if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
     return false;
 
   if (!MemVT.isSimple())
@@ -16015,6 +16300,9 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
   if (Value.getOpcode() == ISD::TargetConstantFP)
     return SDValue();
 
+  if (!ISD::isNormalStore(ST))
+    return SDValue();
+
   SDLoc DL(ST);
 
   SDValue Chain = ST->getChain();
@@ -16075,8 +16363,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
 
       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
                                  ST->getAlignment(), MMOFlags, AAInfo);
-      Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                        DAG.getConstant(4, DL, Ptr.getValueType()));
+      Ptr = DAG.getMemBasePlusOffset(Ptr, 4, DL);
       Alignment = MinAlign(Alignment, 4U);
       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
                                  ST->getPointerInfo().getWithOffset(4),
@@ -16111,8 +16398,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
                                      DAG, *ST->getMemOperand())) {
       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
-                          ST->getPointerInfo(), ST->getAlignment(),
-                          ST->getMemOperand()->getFlags(), ST->getAAInfo());
+                          ST->getMemOperand());
     }
   }
 
@@ -16121,7 +16407,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     return Chain;
 
   // Try to infer better alignment information than the store already has.
-  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+  if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
         SDValue NewStore =
@@ -16451,9 +16737,7 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
   // Lower value store.
   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
                              ST->getAlignment(), MMOFlags, AAInfo);
-  Ptr =
-      DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                  DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+  Ptr = DAG.getMemBasePlusOffset(Ptr, HalfValBitSize / 8, DL);
   // Higher value store.
   SDValue St1 =
       DAG.getStore(St0, DL, Hi, Ptr,
@@ -16464,11 +16748,15 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
 
 /// Convert a disguised subvector insertion into a shuffle:
 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+  assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
+         "Expected extract_vector_elt");
   SDValue InsertVal = N->getOperand(1);
   SDValue Vec = N->getOperand(0);
 
-  // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
-  //   --> (vector_shuffle X, Y)
+  // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
+  // InsIndex)
+  //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
+  //   CONCAT_VECTORS.
   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
@@ -16481,18 +16769,47 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
     // Vec's operand 0 is using indices from 0 to N-1 and
     // operand 1 from N to 2N - 1, where N is the number of
     // elements in the vectors.
-    int XOffset = -1;
-    if (InsertVal.getOperand(0) == X) {
-      XOffset = 0;
-    } else if (InsertVal.getOperand(0) == Y) {
-      XOffset = X.getValueType().getVectorNumElements();
+    SDValue InsertVal0 = InsertVal.getOperand(0);
+    int ElementOffset = -1;
+
+    // We explore the inputs of the shuffle in order to see if we find the
+    // source of the extract_vector_elt. If so, we can use it to modify the
+    // shuffle rather than perform an insert_vector_elt.
+    SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
+    ArgWorkList.emplace_back(Mask.size(), Y);
+    ArgWorkList.emplace_back(0, X);
+
+    while (!ArgWorkList.empty()) {
+      int ArgOffset;
+      SDValue ArgVal;
+      std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
+
+      if (ArgVal == InsertVal0) {
+        ElementOffset = ArgOffset;
+        break;
+      }
+
+      // Peek through concat_vector.
+      if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
+        int CurrentArgOffset =
+            ArgOffset + ArgVal.getValueType().getVectorNumElements();
+        int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
+        for (SDValue Op : reverse(ArgVal->ops())) {
+          CurrentArgOffset -= Step;
+          ArgWorkList.emplace_back(CurrentArgOffset, Op);
+        }
+
+        // Make sure we went through all the elements and did not screw up index
+        // computation.
+        assert(CurrentArgOffset == ArgOffset);
+      }
     }
 
-    if (XOffset != -1) {
+    if (ElementOffset != -1) {
       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
 
       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
-      NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+      NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
       assert(NewMask[InsIndex] <
                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
@@ -16562,13 +16879,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   SDValue EltNo = N->getOperand(2);
   SDLoc DL(N);
 
-  // If the inserted element is an UNDEF, just use the input vector.
-  if (InVal.isUndef())
-    return InVec;
-
   EVT VT = InVec.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
 
+  // Insert into out-of-bounds element is undefined.
+  if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo))
+    if (IndexC->getZExtValue() >= VT.getVectorNumElements())
+      return DAG.getUNDEF(VT);
+
   // Remove redundant insertions:
   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -16683,7 +17001,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
     // operand can't represent this new access since the offset is variable.
     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
   }
-  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
+  NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
 
   // The replacement we need to do here is a little tricky: we need to
   // replace an extractelement of a load with a load.
@@ -16723,8 +17041,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
   AddToWorklist(EVE);
   // Since we're explicitly calling ReplaceAllUses, add the new node to the
   // worklist explicitly as well.
-  AddUsersToWorklist(Load.getNode()); // Add users too
-  AddToWorklist(Load.getNode());
+  AddToWorklistWithUsers(Load.getNode());
   ++OpsNarrowed;
   return SDValue(EVE, 0);
 }
@@ -18239,22 +18556,61 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
         return DAG.getBitcast(NVT, NewExtract);
       }
     }
-    // TODO - handle (DestNumElts % SrcNumElts) == 0
+    if ((DestNumElts % SrcNumElts) == 0) {
+      unsigned DestSrcRatio = DestNumElts / SrcNumElts;
+      if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) {
+        unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio;
+        EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(),
+                                        SrcVT.getScalarType(), NewExtNumElts);
+        if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 &&
+            TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+          unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio;
+          SDLoc DL(N);
+          SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+          SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+                                           V.getOperand(0), NewIndex);
+          return DAG.getBitcast(NVT, NewExtract);
+        }
+      }
+    }
   }
 
-  // Combine:
-  //    (extract_subvec (concat V1, V2, ...), i)
-  // Into:
-  //    Vi if possible
-  // Only operand 0 is checked as 'concat' assumes all inputs of the same
-  // type.
-  if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
-      V.getOperand(0).getValueType() == NVT) {
-    unsigned Idx = N->getConstantOperandVal(1);
-    unsigned NumElems = NVT.getVectorNumElements();
-    assert((Idx % NumElems) == 0 &&
-           "IDX in concat is not a multiple of the result vector length.");
-    return V->getOperand(Idx / NumElems);
+  if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) {
+    EVT ConcatSrcVT = V.getOperand(0).getValueType();
+    assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
+           "Concat and extract subvector do not change element type");
+
+    unsigned ExtIdx = N->getConstantOperandVal(1);
+    unsigned ExtNumElts = NVT.getVectorNumElements();
+    assert(ExtIdx % ExtNumElts == 0 &&
+           "Extract index is not a multiple of the input vector length.");
+
+    unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements();
+    unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
+
+    // If the concatenated source types match this extract, it's a direct
+    // simplification:
+    // extract_subvec (concat V1, V2, ...), i --> Vi
+    if (ConcatSrcNumElts == ExtNumElts)
+      return V.getOperand(ConcatOpIdx);
+
+    // If the concatenated source vectors are a multiple length of this extract,
+    // then extract a fraction of one of those source vectors directly from a
+    // concat operand. Example:
+    //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
+    //   v2i8 extract_subvec v8i8 Y, 6
+    if (ConcatSrcNumElts % ExtNumElts == 0) {
+      SDLoc DL(N);
+      unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
+      assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
+             "Trying to extract from >1 concat operand?");
+      assert(NewExtIdx % ExtNumElts == 0 &&
+             "Extract index is not a multiple of the input vector length.");
+      MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+      SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
+                         V.getOperand(ConcatOpIdx), NewIndexC);
+    }
   }
 
   V = peekThroughBitcasts(V);
@@ -18962,6 +19318,30 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       return V;
   }
 
+  // A shuffle of a concat of the same narrow vector can be reduced to use
+  // only low-half elements of a concat with undef:
+  // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
+      N0.getNumOperands() == 2 &&
+      N0.getOperand(0) == N0.getOperand(1)) {
+    int HalfNumElts = (int)NumElts / 2;
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= HalfNumElts) {
+        assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
+        Idx -= HalfNumElts;
+      }
+      NewMask.push_back(Idx);
+    }
+    if (TLI.isShuffleMaskLegal(NewMask, VT)) {
+      SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
+      SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
+                                   N0.getOperand(0), UndefVec);
+      return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
+    }
+  }
+
   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -19446,8 +19826,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
       int EltIdx = i / Split;
       int SubIdx = i % Split;
       SDValue Elt = RHS.getOperand(EltIdx);
+      // X & undef --> 0 (not undef). So this lane must be converted to choose
+      // from the zero constant vector (same as if the element had all 0-bits).
       if (Elt.isUndef()) {
-        Indices.push_back(-1);
+        Indices.push_back(i + NumSubElts);
         continue;
       }
 
@@ -19460,14 +19842,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
         return SDValue();
 
       // Extract the sub element from the constant bit mask.
-      if (DAG.getDataLayout().isBigEndian()) {
-        Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
-      } else {
-        Bits.lshrInPlace(SubIdx * NumSubBits);
-      }
-
-      if (Split > 1)
-        Bits = Bits.trunc(NumSubBits);
+      if (DAG.getDataLayout().isBigEndian())
+        Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
+      else
+        Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
 
       if (Bits.isAllOnesValue())
         Indices.push_back(i);
@@ -19910,22 +20288,28 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
-    SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
-    SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
-    AddToWorklist(Shift.getNode());
-
-    if (XType.bitsGT(AType)) {
-      Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+    if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
+      SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+      SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
       AddToWorklist(Shift.getNode());
-    }
 
-    if (CC == ISD::SETGT)
-      Shift = DAG.getNOT(DL, Shift, AType);
+      if (XType.bitsGT(AType)) {
+        Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+        AddToWorklist(Shift.getNode());
+      }
 
-    return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+      if (CC == ISD::SETGT)
+        Shift = DAG.getNOT(DL, Shift, AType);
+
+      return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+    }
   }
 
-  SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+  unsigned ShCt = XType.getSizeInBits() - 1;
+  if (TLI.shouldAvoidTransformToShift(XType, ShCt))
+    return SDValue();
+
+  SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
   AddToWorklist(Shift.getNode());
 
@@ -20035,31 +20419,29 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   // when the condition can be materialized as an all-ones register.  Any
   // single bit-test can be materialized as an all-ones register with
   // shift-left and shift-right-arith.
-  // TODO: The operation legality checks could be loosened to include "custom",
-  //       but that may cause regressions for targets that do not have shift
-  //       instructions.
   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
-      N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
-      TLI.isOperationLegal(ISD::SHL, VT) &&
-      TLI.isOperationLegal(ISD::SRA, VT)) {
+      N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
     SDValue AndLHS = N0->getOperand(0);
     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
       // Shift the tested bit over the sign bit.
       const APInt &AndMask = ConstAndRHS->getAPIntValue();
-      SDValue ShlAmt =
-        DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
-                        getShiftAmountTy(AndLHS.getValueType()));
-      SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
-
-      // Now arithmetic right shift it all the way over, so the result is either
-      // all-ones, or zero.
-      SDValue ShrAmt =
-        DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
-                        getShiftAmountTy(Shl.getValueType()));
-      SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
-
-      return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+      unsigned ShCt = AndMask.getBitWidth() - 1;
+      if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
+        SDValue ShlAmt =
+          DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
+                          getShiftAmountTy(AndLHS.getValueType()));
+        SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
+
+        // Now arithmetic right shift it all the way over, so the result is
+        // either all-ones, or zero.
+        SDValue ShrAmt =
+          DAG.getConstant(ShCt, SDLoc(Shl),
+                          getShiftAmountTy(Shl.getValueType()));
+        SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
+
+        return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+      }
     }
   }
 
@@ -20073,7 +20455,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
 
     if (Swap) {
-      CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
+      CC = ISD::getSetCCInverse(CC, CmpOpVT);
       std::swap(N2C, N3C);
     }
 
@@ -20101,10 +20483,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
     if (N2C->isOne())
       return Temp;
 
+    unsigned ShCt = N2C->getAPIntValue().logBase2();
+    if (TLI.shouldAvoidTransformToShift(VT, ShCt))
+      return SDValue();
+
     // shl setcc result by log2 n2c
     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
-                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
-                                       SDLoc(Temp),
+                       DAG.getConstant(ShCt, SDLoc(Temp),
                                        getShiftAmountTy(Temp.getValueType())));
   }
 
@@ -20237,7 +20622,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
 ///   Result = N X_i + X_i (N - N A X_i)
 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
                                       SDNodeFlags Flags) {
-  if (Level >= AfterLegalizeDAG)
+  if (LegalDAG)
     return SDValue();
 
   // TODO: Handle half and/or extended types?
@@ -20376,7 +20761,7 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
 /// Op can be zero.
 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
                                            bool Reciprocal) {
-  if (Level >= AfterLegalizeDAG)
+  if (LegalDAG)
     return SDValue();
 
   // TODO: Handle half and/or extended types?
@@ -20411,9 +20796,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
         SDLoc DL(Op);
         EVT CCVT = getSetCCResultType(VT);
         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
-        const Function &F = DAG.getMachineFunction().getFunction();
-        Attribute Denorms = F.getFnAttribute("denormal-fp-math");
-        if (Denorms.getValueAsString().equals("ieee")) {
+        DenormalMode DenormMode = DAG.getDenormalMode(VT);
+        if (DenormMode == DenormalMode::IEEE) {
           // fabs(X) < SmallestNormal ? 0.0 : Est
           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 6d7260d7aee5..2bec8613e79c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -410,8 +410,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) {
   else if (isa<ConstantPointerNull>(V))
     // Translate this as an integer zero so that it can be
     // local-CSE'd with actual integer zeros.
-    Reg = getRegForValue(
-        Constant::getNullValue(DL.getIntPtrType(V->getContext())));
+    Reg =
+        getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getType())));
   else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
     if (CF->isNullValue())
       Reg = fastMaterializeFloatZero(CF);
@@ -1190,6 +1190,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
       Flags.setSwiftSelf();
     if (Arg.IsSwiftError)
       Flags.setSwiftError();
+    if (Arg.IsCFGuardTarget)
+      Flags.setCFGuardTarget();
     if (Arg.IsByVal)
       Flags.setByVal();
     if (Arg.IsInAlloca) {
@@ -1236,10 +1238,9 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
     updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
 
   // Set labels for heapallocsite call.
-  if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {
-    const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
-    MF->addCodeViewHeapAllocSite(CLI.Call, MD);
-  }
+  if (CLI.CS)
+    if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"))
+      CLI.Call->setHeapAllocMarker(*MF, MD);
 
   return true;
 }
@@ -1275,6 +1276,10 @@ bool FastISel::lowerCall(const CallInst *CI) {
   bool IsTailCall = CI->isTailCall();
   if (IsTailCall && !isInTailCallPosition(CS, TM))
     IsTailCall = false;
+  if (IsTailCall && MF->getFunction()
+                            .getFnAttribute("disable-tail-calls")
+                            .getValueAsString() == "true")
+    IsTailCall = false;
 
   CallLoweringInfo CLI;
   CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS)
@@ -1926,7 +1931,8 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
       TII(*MF->getSubtarget().getInstrInfo()),
       TLI(*MF->getSubtarget().getTargetLowering()),
       TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo),
-      SkipTargetIndependentISel(SkipTargetIndependentISel) {}
+      SkipTargetIndependentISel(SkipTargetIndependentISel),
+      LastLocalValue(nullptr), EmitStartPt(nullptr) {}
 
 FastISel::~FastISel() = default;
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cf6711adad48..fa33400cd4b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -144,7 +144,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
         if (AI->isStaticAlloca() &&
             (TFI->isStackRealignable() || (Align <= StackAlign))) {
           const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
-          uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
+          uint64_t TySize =
+              MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
 
           TySize *= CUI->getZExtValue();   // Get total allocated size.
           if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
@@ -159,6 +160,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                 MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
           }
 
+          // Scalable vectors may need a special StackID to distinguish
+          // them from other (fixed size) stack objects.
+          if (Ty->isVectorTy() && Ty->getVectorIsScalable())
+            MF->getFrameInfo().setStackID(FrameIndex,
+                                          TFI->getStackIDForScalableVectors());
+
           StaticAllocaMap[AI] = FrameIndex;
           // Update the catch handler information.
           if (Iter != CatchObjects.end()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c5095995ec2e..c613c2540628 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -882,8 +882,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
     if (Flags.hasExact())
       MI->setFlag(MachineInstr::MIFlag::IsExact);
 
-    if (Flags.hasFPExcept())
-      MI->setFlag(MachineInstr::MIFlag::FPExcept);
+    if (Flags.hasNoFPExcept())
+      MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
   }
 
   // Emit all of the actual operands of this instruction, adding them to the
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f9fdf525240f..80ac8b95e4ef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -134,26 +134,27 @@ private:
                                      ArrayRef<int> Mask) const;
 
   bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
-                             bool &NeedInvert, const SDLoc &dl);
+                             bool &NeedInvert, const SDLoc &dl, SDValue &Chain,
+                             bool IsSignaling = false);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
 
-  std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
-                                                 SDNode *Node, bool isSigned);
-  SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
-                          RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
-                          RTLIB::Libcall Call_F128,
-                          RTLIB::Libcall Call_PPCF128);
+  void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+                       RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+                       RTLIB::Libcall Call_F128,
+                       RTLIB::Libcall Call_PPCF128,
+                       SmallVectorImpl<SDValue> &Results);
   SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
                            RTLIB::Libcall Call_I8,
                            RTLIB::Libcall Call_I16,
                            RTLIB::Libcall Call_I32,
                            RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
-  SDValue ExpandArgFPLibCall(SDNode *Node,
-                             RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
-                             RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
-                             RTLIB::Libcall Call_PPCF128);
+  void ExpandArgFPLibCall(SDNode *Node,
+                          RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+                          RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+                          RTLIB::Libcall Call_PPCF128,
+                          SmallVectorImpl<SDValue> &Results);
   void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
@@ -172,12 +173,11 @@ private:
                           SDValue NewIntValue) const;
   SDValue ExpandFCOPYSIGN(SDNode *Node) const;
   SDValue ExpandFABS(SDNode *Node) const;
-  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT,
-                               const SDLoc &dl);
-  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
-                                const SDLoc &dl);
-  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
-                                const SDLoc &dl);
+  SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
+  void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
+                             SmallVectorImpl<SDValue> &Results);
+  void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+                             SmallVectorImpl<SDValue> &Results);
 
   SDValue ExpandBITREVERSE(SDValue Op, const SDLoc &dl);
   SDValue ExpandBSWAP(SDValue Op, const SDLoc &dl);
@@ -421,6 +421,9 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
 }
 
 SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+  if (!ISD::isNormalStore(ST))
+    return SDValue();
+
   LLVM_DEBUG(dbgs() << "Optimizing float store operations\n");
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   // FIXME: We shouldn't do this for TargetConstantFP's.
@@ -466,8 +469,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
 
         Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment,
                           MMOFlags, AAInfo);
-        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                          DAG.getConstant(4, dl, Ptr.getValueType()));
+        Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl);
         Hi = DAG.getStore(Chain, dl, Hi, Ptr,
                           ST->getPointerInfo().getWithOffset(4),
                           MinAlign(Alignment, 4U), MMOFlags, AAInfo);
@@ -577,9 +579,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
 
       // Store the remaining ExtraWidth bits.
       IncrementSize = RoundWidth / 8;
-      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                        DAG.getConstant(IncrementSize, dl,
-                                        Ptr.getValueType()));
+      Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
       Hi = DAG.getNode(
           ISD::SRL, dl, Value.getValueType(), Value,
           DAG.getConstant(RoundWidth, dl,
@@ -793,9 +793,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
       // Load the remaining ExtraWidth bits.
       IncrementSize = RoundWidth / 8;
-      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                         DAG.getConstant(IncrementSize, dl,
-                                         Ptr.getValueType()));
+      Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
       Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr,
                           LD->getPointerInfo().getWithOffset(IncrementSize),
                           ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
@@ -824,9 +822,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
 
       // Load the remaining ExtraWidth bits.
       IncrementSize = RoundWidth / 8;
-      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                         DAG.getConstant(IncrementSize, dl,
-                                         Ptr.getValueType()));
+      Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
       Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr,
                           LD->getPointerInfo().getWithOffset(IncrementSize),
                           ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags,
@@ -1013,6 +1009,18 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getOperand(0).getValueType());
     break;
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
+  case ISD::STRICT_LRINT:
+  case ISD::STRICT_LLRINT:
+  case ISD::STRICT_LROUND:
+  case ISD::STRICT_LLROUND:
+    // These pseudo-ops are the same as the other STRICT_ ops except
+    // they are registered with setOperationAction() using the input type
+    // instead of the output type.
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getOperand(1).getValueType());
+    break;
   case ISD::SIGN_EXTEND_INREG: {
     EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
     Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
@@ -1023,11 +1031,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
                                     Node->getOperand(2).getValueType());
     break;
   case ISD::SELECT_CC:
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:
   case ISD::SETCC:
   case ISD::BR_CC: {
     unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+                         Node->getOpcode() == ISD::STRICT_FSETCC ? 3 :
+                         Node->getOpcode() == ISD::STRICT_FSETCCS ? 3 :
                          Node->getOpcode() == ISD::SETCC ? 2 : 1;
-    unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+    unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 :
+                              Node->getOpcode() == ISD::STRICT_FSETCC ? 1 :
+                              Node->getOpcode() == ISD::STRICT_FSETCCS ? 1 : 0;
     MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
     ISD::CondCode CCCode =
         cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
@@ -1105,16 +1119,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
       return;
     }
     break;
-  case ISD::STRICT_LRINT:
-  case ISD::STRICT_LLRINT:
-  case ISD::STRICT_LROUND:
-  case ISD::STRICT_LLROUND:
-    // These pseudo-ops are the same as the other STRICT_ ops except
-    // they are registered with setOperationAction() using the input type
-    // instead of the output type.
-    Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
-                                            Node->getOperand(1).getValueType());
-    break;
   case ISD::SADDSAT:
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
@@ -1125,7 +1129,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::SMULFIX:
   case ISD::SMULFIXSAT:
   case ISD::UMULFIX:
-  case ISD::UMULFIXSAT: {
+  case ISD::UMULFIXSAT:
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX: {
     unsigned Scale = Node->getConstantOperandVal(2);
     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
                                               Node->getValueType(0), Scale);
@@ -1408,7 +1414,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     unsigned Offset = TypeByteSize*i;
 
     SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType());
-    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+    Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl);
 
     // If the destination vector element type is narrower than the source
     // element type, only store the bits necessary.
@@ -1471,8 +1477,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
   } else {
     // Advance the pointer so that the loaded byte will contain the sign bit.
     unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
-    IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
-                      DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+    IntPtr = DAG.getMemBasePlusOffset(StackPtr, ByteOffset, DL);
     State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
                                                              ByteOffset);
   }
@@ -1629,10 +1634,9 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
 /// of a true/false result.
 ///
 /// \returns true if the SetCC has been legalized, false if it hasn't.
-bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
-                                                 SDValue &RHS, SDValue &CC,
-                                                 bool &NeedInvert,
-                                                 const SDLoc &dl) {
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(
+    EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, bool &NeedInvert,
+    const SDLoc &dl, SDValue &Chain, bool IsSignaling) {
   MVT OpVT = LHS.getSimpleValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   NeedInvert = false;
@@ -1650,7 +1654,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
     }
     // Swapping operands didn't work. Try inverting the condition.
     bool NeedSwap = false;
-    InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
+    InvCC = getSetCCInverse(CCCode, OpVT);
     if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
       // If inverting the condition is not enough, try swapping operands
       // on top of it.
@@ -1715,13 +1719,16 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
     if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
       // If we aren't the ordered or unorder operation,
       // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
-      SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
-      SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
+      SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
     } else {
       // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
-      SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1);
-      SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2);
+      SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
+      SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
     }
+    if (Chain)
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
+                          SetCC2.getValue(1));
     LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
     RHS = SDValue();
     CC  = SDValue();
@@ -2077,52 +2084,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   return CallInfo.first;
 }
 
-// Expand a node into a call to a libcall. Similar to
-// ExpandLibCall except that the first operand is the in-chain.
-std::pair<SDValue, SDValue>
-SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
-                                         SDNode *Node,
-                                         bool isSigned) {
-  SDValue InChain = Node->getOperand(0);
-
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
-    EVT ArgVT = Node->getOperand(i).getValueType();
-    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-    Entry.Node = Node->getOperand(i);
-    Entry.Ty = ArgTy;
-    Entry.IsSExt = isSigned;
-    Entry.IsZExt = !isSigned;
-    Args.push_back(Entry);
-  }
-  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy(DAG.getDataLayout()));
-
-  Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
-
-  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(SDLoc(Node))
-      .setChain(InChain)
-      .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
-                    std::move(Args))
-      .setSExtResult(isSigned)
-      .setZExtResult(!isSigned);
-
-  std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
-  return CallInfo;
-}
-
-SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
-                                              RTLIB::Libcall Call_F32,
-                                              RTLIB::Libcall Call_F64,
-                                              RTLIB::Libcall Call_F80,
-                                              RTLIB::Libcall Call_F128,
-                                              RTLIB::Libcall Call_PPCF128) {
-  if (Node->isStrictFPOpcode())
-    Node = DAG.mutateStrictFPToFP(Node);
-
+void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+                                           RTLIB::Libcall Call_F32,
+                                           RTLIB::Libcall Call_F64,
+                                           RTLIB::Libcall Call_F80,
+                                           RTLIB::Libcall Call_F128,
+                                           RTLIB::Libcall Call_PPCF128,
+                                           SmallVectorImpl<SDValue> &Results) {
   RTLIB::Libcall LC;
   switch (Node->getSimpleValueType(0).SimpleTy) {
   default: llvm_unreachable("Unexpected request for libcall!");
@@ -2132,7 +2100,22 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
   case MVT::f128: LC = Call_F128; break;
   case MVT::ppcf128: LC = Call_PPCF128; break;
   }
-  return ExpandLibCall(LC, Node, false);
+
+  if (Node->isStrictFPOpcode()) {
+    EVT RetVT = Node->getValueType(0);
+    SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+    TargetLowering::MakeLibCallOptions CallOptions;
+    // FIXME: This doesn't support tail calls.
+    std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+                                                      Ops, CallOptions,
+                                                      SDLoc(Node),
+                                                      Node->getOperand(0));
+    Results.push_back(Tmp.first);
+    Results.push_back(Tmp.second);
+  } else {
+    SDValue Tmp = ExpandLibCall(LC, Node, false);
+    Results.push_back(Tmp);
+  }
 }
 
 SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
@@ -2155,17 +2138,17 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
 
 /// Expand the node to a libcall based on first argument type (for instance
 /// lround and its variant).
-SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
-                                                 RTLIB::Libcall Call_F32,
-                                                 RTLIB::Libcall Call_F64,
-                                                 RTLIB::Libcall Call_F80,
-                                                 RTLIB::Libcall Call_F128,
-                                                 RTLIB::Libcall Call_PPCF128) {
-  if (Node->isStrictFPOpcode())
-    Node = DAG.mutateStrictFPToFP(Node);
+void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+                                            RTLIB::Libcall Call_F32,
+                                            RTLIB::Libcall Call_F64,
+                                            RTLIB::Libcall Call_F80,
+                                            RTLIB::Libcall Call_F128,
+                                            RTLIB::Libcall Call_PPCF128,
+                                            SmallVectorImpl<SDValue> &Results) {
+  EVT InVT = Node->getOperand(Node->isStrictFPOpcode() ? 1 : 0).getValueType();
 
   RTLIB::Libcall LC;
-  switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+  switch (InVT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::f32:     LC = Call_F32; break;
   case MVT::f64:     LC = Call_F64; break;
@@ -2174,7 +2157,21 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
   case MVT::ppcf128: LC = Call_PPCF128; break;
   }
 
-  return ExpandLibCall(LC, Node, false);
+  if (Node->isStrictFPOpcode()) {
+    EVT RetVT = Node->getValueType(0);
+    SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+    TargetLowering::MakeLibCallOptions CallOptions;
+    // FIXME: This doesn't support tail calls.
+    std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+                                                      Ops, CallOptions,
+                                                      SDLoc(Node),
+                                                      Node->getOperand(0));
+    Results.push_back(Tmp.first);
+    Results.push_back(Tmp.second);
+  } else {
+    SDValue Tmp = ExpandLibCall(LC, Node, false);
+    Results.push_back(Tmp);
+  }
 }
 
 /// Issue libcalls to __{u}divmod to compute div / rem pairs.
@@ -2344,9 +2341,14 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
 /// INT_TO_FP operation of the specified operand when the target requests that
 /// we expand it.  At this point, we know that the result and operand types are
 /// legal for the target.
-SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
-                                                   EVT DestVT,
-                                                   const SDLoc &dl) {
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
+                                                   SDValue &Chain) {
+  bool isSigned = (Node->getOpcode() == ISD::STRICT_SINT_TO_FP ||
+                   Node->getOpcode() == ISD::SINT_TO_FP);
+  EVT DestVT = Node->getValueType(0);
+  SDLoc dl(Node);
+  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+  SDValue Op0 = Node->getOperand(OpNo);
   EVT SrcVT = Op0.getValueType();
 
   // TODO: Should any fast-math-flags be set for the created nodes?
@@ -2393,16 +2395,39 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
                                      BitsToDouble(0x4330000080000000ULL) :
                                      BitsToDouble(0x4330000000000000ULL),
                                      dl, MVT::f64);
-    // subtract the bias
-    SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
-    // final result
-    SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+    // Subtract the bias and get the final result.
+    SDValue Sub;
+    SDValue Result;
+    if (Node->isStrictFPOpcode()) {
+      Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
+                        {Node->getOperand(0), Load, Bias});
+      Chain = Sub.getValue(1);
+      if (DestVT != Sub.getValueType()) {
+        std::pair<SDValue, SDValue> ResultPair;
+        ResultPair =
+            DAG.getStrictFPExtendOrRound(Sub, Chain, dl, DestVT);
+        Result = ResultPair.first;
+        Chain = ResultPair.second;
+      }
+      else
+        Result = Sub;
+    } else {
+      Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+      Result = DAG.getFPExtendOrRound(Sub, dl, DestVT);
+    }
     return Result;
   }
   assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
   // Code below here assumes !isSigned without checking again.
+  // FIXME: This can produce slightly incorrect results. See details in
+  // FIXME: https://reviews.llvm.org/D69275
 
-  SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+  SDValue Tmp1;
+  if (Node->isStrictFPOpcode()) {
+    Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other },
+                       { Node->getOperand(0), Op0 });
+  } else
+    Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
 
   SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0,
                                  DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
@@ -2448,6 +2473,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
     FudgeInReg = Handle.getValue();
   }
 
+  if (Node->isStrictFPOpcode()) {
+    SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other },
+                                 { Tmp1.getValue(1), Tmp1, FudgeInReg });
+    Chain = Result.getValue(1);
+    return Result;
+  }
+
   return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
 }
 
@@ -2456,9 +2488,16 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
 /// we promote it.  At this point, we know that the result and operand types are
 /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
 /// operation that takes a larger input.
-SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
-                                                    bool isSigned,
-                                                    const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
+    SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
+  bool IsStrict = N->isStrictFPOpcode();
+  bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+                  N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+  EVT DestVT = N->getValueType(0);
+  SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
+  unsigned UIntOp = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
+  unsigned SIntOp = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
+
   // First step, figure out the appropriate *INT_TO_FP operation to use.
   EVT NewInTy = LegalOp.getValueType();
 
@@ -2470,15 +2509,16 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
     assert(NewInTy.isInteger() && "Ran out of possibilities!");
 
     // If the target supports SINT_TO_FP of this type, use it.
-    if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
-      OpToUse = ISD::SINT_TO_FP;
+    if (TLI.isOperationLegalOrCustom(SIntOp, NewInTy)) {
+      OpToUse = SIntOp;
       break;
     }
-    if (isSigned) continue;
+    if (IsSigned)
+      continue;
 
     // If the target supports UINT_TO_FP of this type, use it.
-    if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
-      OpToUse = ISD::UINT_TO_FP;
+    if (TLI.isOperationLegalOrCustom(UIntOp, NewInTy)) {
+      OpToUse = UIntOp;
       break;
     }
 
@@ -2487,9 +2527,21 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
 
   // Okay, we found the operation and type to use.  Zero extend our input to the
   // desired type then run the operation on it.
-  return DAG.getNode(OpToUse, dl, DestVT,
-                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
-                                 dl, NewInTy, LegalOp));
+  if (IsStrict) {
+    SDValue Res =
+        DAG.getNode(OpToUse, dl, {DestVT, MVT::Other},
+                    {N->getOperand(0),
+                     DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                 dl, NewInTy, LegalOp)});
+    Results.push_back(Res);
+    Results.push_back(Res.getValue(1));
+    return;
+  }
+
+  Results.push_back(
+      DAG.getNode(OpToUse, dl, DestVT,
+                  DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                              dl, NewInTy, LegalOp)));
 }
 
 /// This function is responsible for legalizing a
@@ -2497,9 +2549,13 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
 /// we promote it.  At this point, we know that the result and operand types are
 /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
 /// operation that returns a larger result.
-SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
-                                                    bool isSigned,
-                                                    const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
+                                                 SmallVectorImpl<SDValue> &Results) {
+  bool IsStrict = N->isStrictFPOpcode();
+  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
+                  N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+  EVT DestVT = N->getValueType(0);
+  SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
   // First step, figure out the appropriate FP_TO*INT operation to use.
   EVT NewOutTy = DestVT;
 
@@ -2512,26 +2568,32 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
 
     // A larger signed type can hold all unsigned values of the requested type,
     // so using FP_TO_SINT is valid
-    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
-      OpToUse = ISD::FP_TO_SINT;
+    OpToUse = IsStrict ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT;
+    if (TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
       break;
-    }
 
     // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
-    if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
-      OpToUse = ISD::FP_TO_UINT;
+    OpToUse = IsStrict ? ISD::STRICT_FP_TO_UINT : ISD::FP_TO_UINT;
+    if (!IsSigned && TLI.isOperationLegalOrCustom(OpToUse, NewOutTy))
       break;
-    }
 
     // Otherwise, try a larger type.
   }
 
   // Okay, we found the operation and type to use.
-  SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+  SDValue Operation;
+  if (IsStrict) {
+    SDVTList VTs = DAG.getVTList(NewOutTy, MVT::Other);
+    Operation = DAG.getNode(OpToUse, dl, VTs, N->getOperand(0), LegalOp);
+  } else
+    Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
 
   // Truncate the result of the extended FP_TO_*INT operation to the desired
   // size.
-  return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+  Results.push_back(Trunc);
+  if (IsStrict)
+    Results.push_back(Operation.getValue(1));
 }
 
 /// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
@@ -2812,12 +2874,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
   case ISD::STRICT_FP_ROUND:
-    // This expansion does not honor the "strict" properties anyway,
-    // so prefer falling back to the non-strict operation if legal.
+    // When strict mode is enforced we can't do expansion because it
+    // does not honor the "strict" properties. Only libcall is allowed.
+    if (TLI.isStrictFPEnabled())
+      break;
+    // We might as well mutate to FP_ROUND when FP_ROUND operation is legal
+    // since this operation is more efficient than stack operation.
     if (TLI.getStrictFPOperationAction(Node->getOpcode(),
                                        Node->getValueType(0))
         == TargetLowering::Legal)
       break;
+    // We fall back to use stack operation when the FP_ROUND operation
+    // isn't available.
     Tmp1 = EmitStackConvert(Node->getOperand(1), 
                             Node->getValueType(0),
                             Node->getValueType(0), dl, Node->getOperand(0));
@@ -2832,12 +2900,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   case ISD::STRICT_FP_EXTEND:
-    // This expansion does not honor the "strict" properties anyway,
-    // so prefer falling back to the non-strict operation if legal.
+    // When strict mode is enforced we can't do expansion because it
+    // does not honor the "strict" properties. Only libcall is allowed.
+    if (TLI.isStrictFPEnabled())
+      break;
+    // We might as well mutate to FP_EXTEND when FP_EXTEND operation is legal
+    // since this operation is more efficient than stack operation.
     if (TLI.getStrictFPOperationAction(Node->getOpcode(),
                                        Node->getValueType(0))
         == TargetLowering::Legal)
       break;
+    // We fall back to use stack operation when the FP_EXTEND operation
+    // isn't available.
     Tmp1 = EmitStackConvert(Node->getOperand(1),
                             Node->getOperand(1).getValueType(),
                             Node->getValueType(0), dl, Node->getOperand(0));
@@ -2883,15 +2957,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
   case ISD::UINT_TO_FP:
-    if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
+  case ISD::STRICT_UINT_TO_FP:
+    if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) {
       Results.push_back(Tmp1);
+      if (Node->isStrictFPOpcode())
+        Results.push_back(Tmp2);
       break;
     }
     LLVM_FALLTHROUGH;
   case ISD::SINT_TO_FP:
-    Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
-                                Node->getOperand(0), Node->getValueType(0), dl);
+  case ISD::STRICT_SINT_TO_FP:
+    Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2);
     Results.push_back(Tmp1);
+    if (Node->isStrictFPOpcode())
+      Results.push_back(Tmp2);
     break;
   case ISD::FP_TO_SINT:
     if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
@@ -3340,6 +3419,24 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::UMULFIXSAT:
     Results.push_back(TLI.expandFixedPointMul(Node, DAG));
     break;
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX:
+    if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
+                                            Node->getOperand(0),
+                                            Node->getOperand(1),
+                                            Node->getConstantOperandVal(2),
+                                            DAG)) {
+      Results.push_back(V);
+      break;
+    }
+    // FIXME: We might want to retry here with a wider type if we fail, if that
+    // type is legal.
+    // FIXME: Technically, so long as we only have sdivfixes where BW+Scale is
+    // <= 128 (which is the case for all of the default Embedded-C types),
+    // we will only get here with types and scales that we could always expand
+    // if we were allowed to generate libcalls to division functions of illegal
+    // type. But we cannot do that.
+    llvm_unreachable("Cannot expand DIVFIX!");
   case ISD::ADDCARRY:
   case ISD::SUBCARRY: {
     SDValue LHS = Node->getOperand(0);
@@ -3503,12 +3600,19 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
     Results.push_back(Tmp1);
     break;
-  case ISD::SETCC: {
-    Tmp1 = Node->getOperand(0);
-    Tmp2 = Node->getOperand(1);
-    Tmp3 = Node->getOperand(2);
-    bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
-                                           Tmp3, NeedInvert, dl);
+  case ISD::SETCC:
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS: {
+    bool IsStrict = Node->getOpcode() != ISD::SETCC;
+    bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
+    SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+    unsigned Offset = IsStrict ? 1 : 0;
+    Tmp1 = Node->getOperand(0 + Offset);
+    Tmp2 = Node->getOperand(1 + Offset);
+    Tmp3 = Node->getOperand(2 + Offset);
+    bool Legalized =
+        LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3,
+                              NeedInvert, dl, Chain, IsSignaling);
 
     if (Legalized) {
       // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
@@ -3523,9 +3627,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
 
       Results.push_back(Tmp1);
+      if (IsStrict)
+        Results.push_back(Chain);
+
       break;
     }
 
+    // FIXME: It seems Legalized is false iff CCCode is Legal. I don't
+    // understand if this code is useful for strict nodes.
+    assert(!IsStrict && "Don't know how to expand for strict nodes.");
+
     // Otherwise, SETCC for the given comparison type must be completely
     // illegal; expand it into a SELECT_CC.
     EVT VT = Node->getValueType(0);
@@ -3548,11 +3659,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
   case ISD::SELECT_CC: {
+    // TODO: need to add STRICT_SELECT_CC and STRICT_SELECT_CCS
     Tmp1 = Node->getOperand(0);   // LHS
     Tmp2 = Node->getOperand(1);   // RHS
     Tmp3 = Node->getOperand(2);   // True
     Tmp4 = Node->getOperand(3);   // False
     EVT VT = Node->getValueType(0);
+    SDValue Chain;
     SDValue CC = Node->getOperand(4);
     ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
 
@@ -3574,8 +3687,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // Try to legalize by inverting the condition.  This is for targets that
     // might support an ordered version of a condition, but not the unordered
     // version (or vice versa).
-    ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
-                                               Tmp1.getValueType().isInteger());
+    ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType());
     if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) {
       // Use the new condition code and swap true and false
       Legalized = true;
@@ -3595,9 +3707,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
 
     if (!Legalized) {
-      Legalized = LegalizeSetCCCondCode(
-          getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert,
-          dl);
+      Legalized = LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
+                                        Tmp1, Tmp2, CC, NeedInvert, dl, Chain);
 
       assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
 
@@ -3623,13 +3734,16 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
   case ISD::BR_CC: {
+    // TODO: need to add STRICT_BR_CC and STRICT_BR_CCS
+    SDValue Chain;
     Tmp1 = Node->getOperand(0);              // Chain
     Tmp2 = Node->getOperand(2);              // LHS
     Tmp3 = Node->getOperand(3);              // RHS
     Tmp4 = Node->getOperand(1);              // CC
 
-    bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
-        Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl);
+    bool Legalized =
+        LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()), Tmp2,
+                              Tmp3, Tmp4, NeedInvert, dl, Chain);
     (void)Legalized;
     assert(Legalized && "Can't legalize BR_CC with legal condition!");
 
@@ -3677,7 +3791,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
 
     SDValue Result = DAG.getBuildVector(Node->getValueType(0), dl, Scalars);
-    ReplaceNode(SDValue(Node, 0), Result);
+    Results.push_back(Result);
     break;
   }
   case ISD::VECREDUCE_FADD:
@@ -3705,10 +3819,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_VOID:
     // FIXME: Custom lowering for these operations shouldn't return null!
-    break;
+    // Return true so that we don't call ConvertNodeToLibcall which also won't
+    // do anything.
+    return true;
   }
 
-  if (Results.empty() && Node->isStrictFPOpcode()) {
+  if (!TLI.isStrictFPEnabled() && Results.empty() && Node->isStrictFPOpcode()) {
     // FIXME: We were asked to expand a strict floating-point operation,
     // but there is currently no expansion implemented that would preserve
     // the "strict" properties.  For now, we just fall back to the non-strict
@@ -3793,7 +3909,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
 
-    std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+    EVT RetVT = Node->getValueType(0);
+    SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+    TargetLowering::MakeLibCallOptions CallOptions;
+    std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+                                                      Ops, CallOptions,
+                                                      SDLoc(Node),
+                                                      Node->getOperand(0));
     Results.push_back(Tmp.first);
     Results.push_back(Tmp.second);
     break;
@@ -3815,38 +3937,38 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
   }
   case ISD::FMINNUM:
   case ISD::STRICT_FMINNUM:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
-                                      RTLIB::FMIN_F80, RTLIB::FMIN_F128,
-                                      RTLIB::FMIN_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+                    RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+                    RTLIB::FMIN_PPCF128, Results);
     break;
   case ISD::FMAXNUM:
   case ISD::STRICT_FMAXNUM:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
-                                      RTLIB::FMAX_F80, RTLIB::FMAX_F128,
-                                      RTLIB::FMAX_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+                    RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+                    RTLIB::FMAX_PPCF128, Results);
     break;
   case ISD::FSQRT:
   case ISD::STRICT_FSQRT:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
-                                      RTLIB::SQRT_F80, RTLIB::SQRT_F128,
-                                      RTLIB::SQRT_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                    RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+                    RTLIB::SQRT_PPCF128, Results);
     break;
   case ISD::FCBRT:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
-                                      RTLIB::CBRT_F80, RTLIB::CBRT_F128,
-                                      RTLIB::CBRT_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+                    RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+                    RTLIB::CBRT_PPCF128, Results);
     break;
   case ISD::FSIN:
   case ISD::STRICT_FSIN:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
-                                      RTLIB::SIN_F80, RTLIB::SIN_F128,
-                                      RTLIB::SIN_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+                    RTLIB::SIN_F80, RTLIB::SIN_F128,
+                    RTLIB::SIN_PPCF128, Results);
     break;
   case ISD::FCOS:
   case ISD::STRICT_FCOS:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
-                                      RTLIB::COS_F80, RTLIB::COS_F128,
-                                      RTLIB::COS_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+                    RTLIB::COS_F80, RTLIB::COS_F128,
+                    RTLIB::COS_PPCF128, Results);
     break;
   case ISD::FSINCOS:
     // Expand into sincos libcall.
@@ -3855,181 +3977,204 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
   case ISD::FLOG:
   case ISD::STRICT_FLOG:
     if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite))
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
-                                        RTLIB::LOG_FINITE_F64,
-                                        RTLIB::LOG_FINITE_F80,
-                                        RTLIB::LOG_FINITE_F128,
-                                        RTLIB::LOG_FINITE_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32,
+                      RTLIB::LOG_FINITE_F64,
+                      RTLIB::LOG_FINITE_F80,
+                      RTLIB::LOG_FINITE_F128,
+                      RTLIB::LOG_FINITE_PPCF128, Results);
     else
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
-                                        RTLIB::LOG_F80, RTLIB::LOG_F128,
-                                        RTLIB::LOG_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+                      RTLIB::LOG_F80, RTLIB::LOG_F128,
+                      RTLIB::LOG_PPCF128, Results);
     break;
   case ISD::FLOG2:
   case ISD::STRICT_FLOG2:
     if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite))
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
-                                        RTLIB::LOG2_FINITE_F64,
-                                        RTLIB::LOG2_FINITE_F80,
-                                        RTLIB::LOG2_FINITE_F128,
-                                        RTLIB::LOG2_FINITE_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32,
+                      RTLIB::LOG2_FINITE_F64,
+                      RTLIB::LOG2_FINITE_F80,
+                      RTLIB::LOG2_FINITE_F128,
+                      RTLIB::LOG2_FINITE_PPCF128, Results);
     else
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
-                                        RTLIB::LOG2_F80, RTLIB::LOG2_F128,
-                                        RTLIB::LOG2_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                      RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+                      RTLIB::LOG2_PPCF128, Results);
     break;
   case ISD::FLOG10:
   case ISD::STRICT_FLOG10:
     if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite))
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
-                                        RTLIB::LOG10_FINITE_F64,
-                                        RTLIB::LOG10_FINITE_F80,
-                                        RTLIB::LOG10_FINITE_F128,
-                                        RTLIB::LOG10_FINITE_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32,
+                      RTLIB::LOG10_FINITE_F64,
+                      RTLIB::LOG10_FINITE_F80,
+                      RTLIB::LOG10_FINITE_F128,
+                      RTLIB::LOG10_FINITE_PPCF128, Results);
     else
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
-                                        RTLIB::LOG10_F80, RTLIB::LOG10_F128,
-                                        RTLIB::LOG10_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+                      RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+                      RTLIB::LOG10_PPCF128, Results);
     break;
   case ISD::FEXP:
   case ISD::STRICT_FEXP:
     if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite))
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
-                                        RTLIB::EXP_FINITE_F64,
-                                        RTLIB::EXP_FINITE_F80,
-                                        RTLIB::EXP_FINITE_F128,
-                                        RTLIB::EXP_FINITE_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32,
+                      RTLIB::EXP_FINITE_F64,
+                      RTLIB::EXP_FINITE_F80,
+                      RTLIB::EXP_FINITE_F128,
+                      RTLIB::EXP_FINITE_PPCF128, Results);
     else
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
-                                        RTLIB::EXP_F80, RTLIB::EXP_F128,
-                                        RTLIB::EXP_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+                      RTLIB::EXP_F80, RTLIB::EXP_F128,
+                      RTLIB::EXP_PPCF128, Results);
     break;
   case ISD::FEXP2:
   case ISD::STRICT_FEXP2:
     if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite))
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
-                                        RTLIB::EXP2_FINITE_F64,
-                                        RTLIB::EXP2_FINITE_F80,
-                                        RTLIB::EXP2_FINITE_F128,
-                                        RTLIB::EXP2_FINITE_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32,
+                      RTLIB::EXP2_FINITE_F64,
+                      RTLIB::EXP2_FINITE_F80,
+                      RTLIB::EXP2_FINITE_F128,
+                      RTLIB::EXP2_FINITE_PPCF128, Results);
     else
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
-                                        RTLIB::EXP2_F80, RTLIB::EXP2_F128,
-                                        RTLIB::EXP2_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                      RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+                      RTLIB::EXP2_PPCF128, Results);
     break;
   case ISD::FTRUNC:
   case ISD::STRICT_FTRUNC:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
-                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
-                                      RTLIB::TRUNC_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                    RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+                    RTLIB::TRUNC_PPCF128, Results);
     break;
   case ISD::FFLOOR:
   case ISD::STRICT_FFLOOR:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
-                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
-                                      RTLIB::FLOOR_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+                    RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+                    RTLIB::FLOOR_PPCF128, Results);
     break;
   case ISD::FCEIL:
   case ISD::STRICT_FCEIL:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
-                                      RTLIB::CEIL_F80, RTLIB::CEIL_F128,
-                                      RTLIB::CEIL_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                    RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+                    RTLIB::CEIL_PPCF128, Results);
     break;
   case ISD::FRINT:
   case ISD::STRICT_FRINT:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
-                                      RTLIB::RINT_F80, RTLIB::RINT_F128,
-                                      RTLIB::RINT_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+                    RTLIB::RINT_F80, RTLIB::RINT_F128,
+                    RTLIB::RINT_PPCF128, Results);
     break;
   case ISD::FNEARBYINT:
   case ISD::STRICT_FNEARBYINT:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
-                                      RTLIB::NEARBYINT_F64,
-                                      RTLIB::NEARBYINT_F80,
-                                      RTLIB::NEARBYINT_F128,
-                                      RTLIB::NEARBYINT_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+                    RTLIB::NEARBYINT_F64,
+                    RTLIB::NEARBYINT_F80,
+                    RTLIB::NEARBYINT_F128,
+                    RTLIB::NEARBYINT_PPCF128, Results);
     break;
   case ISD::FROUND:
   case ISD::STRICT_FROUND:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
-                                      RTLIB::ROUND_F64,
-                                      RTLIB::ROUND_F80,
-                                      RTLIB::ROUND_F128,
-                                      RTLIB::ROUND_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+                    RTLIB::ROUND_F64,
+                    RTLIB::ROUND_F80,
+                    RTLIB::ROUND_F128,
+                    RTLIB::ROUND_PPCF128, Results);
     break;
   case ISD::FPOWI:
-  case ISD::STRICT_FPOWI:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
-                                      RTLIB::POWI_F80, RTLIB::POWI_F128,
-                                      RTLIB::POWI_PPCF128));
+  case ISD::STRICT_FPOWI: {
+    RTLIB::Libcall LC;
+    switch (Node->getSimpleValueType(0).SimpleTy) {
+    default: llvm_unreachable("Unexpected request for libcall!");
+    case MVT::f32: LC = RTLIB::POWI_F32; break;
+    case MVT::f64: LC = RTLIB::POWI_F64; break;
+    case MVT::f80: LC = RTLIB::POWI_F80; break;
+    case MVT::f128: LC = RTLIB::POWI_F128; break;
+    case MVT::ppcf128: LC = RTLIB::POWI_PPCF128; break;
+    }
+    if (!TLI.getLibcallName(LC)) {
+      // Some targets don't have a powi libcall; use pow instead.
+      SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node),
+                                     Node->getValueType(0),
+                                     Node->getOperand(1));
+      Results.push_back(DAG.getNode(ISD::FPOW, SDLoc(Node),
+                                    Node->getValueType(0), Node->getOperand(0),
+                                    Exponent));
+      break;
+    }
+    ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+                    RTLIB::POWI_F80, RTLIB::POWI_F128,
+                    RTLIB::POWI_PPCF128, Results);
     break;
+  }
   case ISD::FPOW:
   case ISD::STRICT_FPOW:
     if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite))
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
-                                        RTLIB::POW_FINITE_F64,
-                                        RTLIB::POW_FINITE_F80,
-                                        RTLIB::POW_FINITE_F128,
-                                        RTLIB::POW_FINITE_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32,
+                      RTLIB::POW_FINITE_F64,
+                      RTLIB::POW_FINITE_F80,
+                      RTLIB::POW_FINITE_F128,
+                      RTLIB::POW_FINITE_PPCF128, Results);
     else
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
-                                        RTLIB::POW_F80, RTLIB::POW_F128,
-                                        RTLIB::POW_PPCF128));
+      ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+                      RTLIB::POW_F80, RTLIB::POW_F128,
+                      RTLIB::POW_PPCF128, Results);
     break;
   case ISD::LROUND:
   case ISD::STRICT_LROUND:
-    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
-                                         RTLIB::LROUND_F64, RTLIB::LROUND_F80,
-                                         RTLIB::LROUND_F128,
-                                         RTLIB::LROUND_PPCF128));
+    ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+                       RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+                       RTLIB::LROUND_F128,
+                       RTLIB::LROUND_PPCF128, Results);
     break;
   case ISD::LLROUND:
   case ISD::STRICT_LLROUND:
-    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
-                                         RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
-                                         RTLIB::LLROUND_F128,
-                                         RTLIB::LLROUND_PPCF128));
+    ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+                       RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+                       RTLIB::LLROUND_F128,
+                       RTLIB::LLROUND_PPCF128, Results);
     break;
   case ISD::LRINT:
   case ISD::STRICT_LRINT:
-    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
-                                         RTLIB::LRINT_F64, RTLIB::LRINT_F80,
-                                         RTLIB::LRINT_F128,
-                                         RTLIB::LRINT_PPCF128));
+    ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+                       RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+                       RTLIB::LRINT_F128,
+                       RTLIB::LRINT_PPCF128, Results);
     break;
   case ISD::LLRINT:
   case ISD::STRICT_LLRINT:
-    Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
-                                         RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
-                                         RTLIB::LLRINT_F128,
-                                         RTLIB::LLRINT_PPCF128));
+    ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+                       RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+                       RTLIB::LLRINT_F128,
+                       RTLIB::LLRINT_PPCF128, Results);
     break;
   case ISD::FDIV:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
-                                      RTLIB::DIV_F80, RTLIB::DIV_F128,
-                                      RTLIB::DIV_PPCF128));
+  case ISD::STRICT_FDIV:
+    ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+                    RTLIB::DIV_F80, RTLIB::DIV_F128,
+                    RTLIB::DIV_PPCF128, Results);
     break;
   case ISD::FREM:
   case ISD::STRICT_FREM:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
-                                      RTLIB::REM_F80, RTLIB::REM_F128,
-                                      RTLIB::REM_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+                    RTLIB::REM_F80, RTLIB::REM_F128,
+                    RTLIB::REM_PPCF128, Results);
     break;
   case ISD::FMA:
   case ISD::STRICT_FMA:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
-                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
-                                      RTLIB::FMA_PPCF128));
+    ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                    RTLIB::FMA_F80, RTLIB::FMA_F128,
+                    RTLIB::FMA_PPCF128, Results);
     break;
   case ISD::FADD:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
-                                      RTLIB::ADD_F80, RTLIB::ADD_F128,
-                                      RTLIB::ADD_PPCF128));
+  case ISD::STRICT_FADD:
+    ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+                    RTLIB::ADD_F80, RTLIB::ADD_F128,
+                    RTLIB::ADD_PPCF128, Results);
     break;
   case ISD::FMUL:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
-                                      RTLIB::MUL_F80, RTLIB::MUL_F128,
-                                      RTLIB::MUL_PPCF128));
+  case ISD::STRICT_FMUL:
+    ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+                    RTLIB::MUL_F80, RTLIB::MUL_F128,
+                    RTLIB::MUL_PPCF128, Results);
     break;
   case ISD::FP16_TO_FP:
     if (Node->getValueType(0) == MVT::f32) {
@@ -4044,9 +4189,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     break;
   }
   case ISD::FSUB:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
-                                      RTLIB::SUB_F80, RTLIB::SUB_F128,
-                                      RTLIB::SUB_PPCF128));
+  case ISD::STRICT_FSUB:
+    ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+                    RTLIB::SUB_F80, RTLIB::SUB_F128,
+                    RTLIB::SUB_PPCF128, Results);
     break;
   case ISD::SREM:
     Results.push_back(ExpandIntLibCall(Node, true,
@@ -4129,6 +4275,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
     OVT = Node->getOperand(0).getSimpleValueType();
   }
+  if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+      Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+    OVT = Node->getOperand(1).getSimpleValueType();
   if (Node->getOpcode() == ISD::BR_CC)
     OVT = Node->getOperand(2).getSimpleValueType();
   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
@@ -4177,16 +4326,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     break;
   }
   case ISD::FP_TO_UINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_SINT:
-    Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
-                                 Node->getOpcode() == ISD::FP_TO_SINT, dl);
-    Results.push_back(Tmp1);
+  case ISD::STRICT_FP_TO_SINT:
+    PromoteLegalFP_TO_INT(Node, dl, Results);
     break;
   case ISD::UINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
   case ISD::SINT_TO_FP:
-    Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
-                                 Node->getOpcode() == ISD::SINT_TO_FP, dl);
-    Results.push_back(Tmp1);
+  case ISD::STRICT_SINT_TO_FP:
+    PromoteLegalINT_TO_FP(Node, dl, Results);
     break;
   case ISD::VAARG: {
     SDValue Chain = Node->getOperand(0); // Get the chain.
@@ -4358,6 +4507,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
                                   Tmp3, DAG.getIntPtrConstant(0, dl)));
     break;
+  case ISD::STRICT_FREM:
+  case ISD::STRICT_FPOW:
+    Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+                       {Node->getOperand(0), Node->getOperand(1)});
+    Tmp2 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+                       {Node->getOperand(0), Node->getOperand(2)});
+    Tmp3 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Tmp1.getValue(1),
+                       Tmp2.getValue(1));
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+                       {Tmp3, Tmp1, Tmp2});
+    Tmp1 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+                       {Tmp1.getValue(1), Tmp1, DAG.getIntPtrConstant(0, dl)});
+    Results.push_back(Tmp1);
+    Results.push_back(Tmp1.getValue(1));
+    break;
   case ISD::FMA:
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
@@ -4404,6 +4568,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
                                   Tmp2, DAG.getIntPtrConstant(0, dl)));
     break;
+  case ISD::STRICT_FFLOOR:
+  case ISD::STRICT_FCEIL:
+  case ISD::STRICT_FSIN:
+  case ISD::STRICT_FCOS:
+  case ISD::STRICT_FLOG:
+  case ISD::STRICT_FLOG10:
+  case ISD::STRICT_FEXP:
+    Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other},
+                       {Node->getOperand(0), Node->getOperand(1)});
+    Tmp2 = DAG.getNode(Node->getOpcode(), dl, {NVT, MVT::Other},
+                       {Tmp1.getValue(1), Tmp1});
+    Tmp3 = DAG.getNode(ISD::STRICT_FP_ROUND, dl, {OVT, MVT::Other},
+                       {Tmp2.getValue(1), Tmp2, DAG.getIntPtrConstant(0, dl)});
+    Results.push_back(Tmp3);
+    Results.push_back(Tmp3.getValue(1));
+    break;
   case ISD::BUILD_VECTOR: {
     MVT EltVT = OVT.getVectorElementType();
     MVT NewEltVT = NVT.getVectorElementType();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 72d052473f11..f191160dee4f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -65,39 +65,68 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::EXTRACT_VECTOR_ELT:
       R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
     case ISD::FABS:        R = SoftenFloatRes_FABS(N); break;
+    case ISD::STRICT_FMINNUM:
     case ISD::FMINNUM:     R = SoftenFloatRes_FMINNUM(N); break;
+    case ISD::STRICT_FMAXNUM:
     case ISD::FMAXNUM:     R = SoftenFloatRes_FMAXNUM(N); break;
+    case ISD::STRICT_FADD:
     case ISD::FADD:        R = SoftenFloatRes_FADD(N); break;
+    case ISD::FCBRT:       R = SoftenFloatRes_FCBRT(N); break;
+    case ISD::STRICT_FCEIL:
     case ISD::FCEIL:       R = SoftenFloatRes_FCEIL(N); break;
     case ISD::FCOPYSIGN:   R = SoftenFloatRes_FCOPYSIGN(N); break;
+    case ISD::STRICT_FCOS:
     case ISD::FCOS:        R = SoftenFloatRes_FCOS(N); break;
+    case ISD::STRICT_FDIV:
     case ISD::FDIV:        R = SoftenFloatRes_FDIV(N); break;
+    case ISD::STRICT_FEXP:
     case ISD::FEXP:        R = SoftenFloatRes_FEXP(N); break;
+    case ISD::STRICT_FEXP2:
     case ISD::FEXP2:       R = SoftenFloatRes_FEXP2(N); break;
+    case ISD::STRICT_FFLOOR:
     case ISD::FFLOOR:      R = SoftenFloatRes_FFLOOR(N); break;
+    case ISD::STRICT_FLOG:
     case ISD::FLOG:        R = SoftenFloatRes_FLOG(N); break;
+    case ISD::STRICT_FLOG2:
     case ISD::FLOG2:       R = SoftenFloatRes_FLOG2(N); break;
+    case ISD::STRICT_FLOG10:
     case ISD::FLOG10:      R = SoftenFloatRes_FLOG10(N); break;
+    case ISD::STRICT_FMA:
     case ISD::FMA:         R = SoftenFloatRes_FMA(N); break;
+    case ISD::STRICT_FMUL:
     case ISD::FMUL:        R = SoftenFloatRes_FMUL(N); break;
+    case ISD::STRICT_FNEARBYINT:
     case ISD::FNEARBYINT:  R = SoftenFloatRes_FNEARBYINT(N); break;
     case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
+    case ISD::STRICT_FP_EXTEND:
     case ISD::FP_EXTEND:   R = SoftenFloatRes_FP_EXTEND(N); break;
+    case ISD::STRICT_FP_ROUND:
     case ISD::FP_ROUND:    R = SoftenFloatRes_FP_ROUND(N); break;
     case ISD::FP16_TO_FP:  R = SoftenFloatRes_FP16_TO_FP(N); break;
+    case ISD::STRICT_FPOW:
     case ISD::FPOW:        R = SoftenFloatRes_FPOW(N); break;
+    case ISD::STRICT_FPOWI:
     case ISD::FPOWI:       R = SoftenFloatRes_FPOWI(N); break;
+    case ISD::STRICT_FREM:
     case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
+    case ISD::STRICT_FRINT:
     case ISD::FRINT:       R = SoftenFloatRes_FRINT(N); break;
+    case ISD::STRICT_FROUND:
     case ISD::FROUND:      R = SoftenFloatRes_FROUND(N); break;
+    case ISD::STRICT_FSIN:
     case ISD::FSIN:        R = SoftenFloatRes_FSIN(N); break;
+    case ISD::STRICT_FSQRT:
     case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
+    case ISD::STRICT_FSUB:
     case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
+    case ISD::STRICT_FTRUNC:
     case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
     case ISD::LOAD:        R = SoftenFloatRes_LOAD(N); break;
     case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
     case ISD::SELECT:      R = SoftenFloatRes_SELECT(N); break;
     case ISD::SELECT_CC:   R = SoftenFloatRes_SELECT_CC(N); break;
+    case ISD::STRICT_SINT_TO_FP:
+    case ISD::STRICT_UINT_TO_FP:
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:  R = SoftenFloatRes_XINT_TO_FP(N); break;
     case ISD::UNDEF:       R = SoftenFloatRes_UNDEF(N); break;
@@ -111,6 +140,46 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
   }
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) {
+  bool IsStrict = N->isStrictFPOpcode();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned Offset = IsStrict ? 1 : 0;
+  assert(N->getNumOperands() == (1 + Offset) &&
+         "Unexpected number of operands!");
+  SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  TargetLowering::MakeLibCallOptions CallOptions;
+  EVT OpVT = N->getOperand(0 + Offset).getValueType();
+  CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+                                                    CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  return Tmp.first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) {
+  bool IsStrict = N->isStrictFPOpcode();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned Offset = IsStrict ? 1 : 0;
+  assert(N->getNumOperands() == (2 + Offset) &&
+         "Unexpected number of operands!");
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+                     GetSoftenedFloat(N->getOperand(1 + Offset)) };
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  TargetLowering::MakeLibCallOptions CallOptions;
+  EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+                   N->getOperand(1 + Offset).getValueType() };
+  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+                                                    CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  return Tmp.first;
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
   return BitConvertToInteger(N->getOperand(0));
 }
@@ -174,69 +243,48 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::FMIN_F32,
-                                           RTLIB::FMIN_F64,
-                                           RTLIB::FMIN_F80,
-                                           RTLIB::FMIN_F128,
-                                           RTLIB::FMIN_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::FMIN_F32,
+                                               RTLIB::FMIN_F64,
+                                               RTLIB::FMIN_F80,
+                                               RTLIB::FMIN_F128,
+                                               RTLIB::FMIN_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::FMAX_F32,
-                                           RTLIB::FMAX_F64,
-                                           RTLIB::FMAX_F80,
-                                           RTLIB::FMAX_F128,
-                                           RTLIB::FMAX_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::FMAX_F32,
+                                               RTLIB::FMAX_F64,
+                                               RTLIB::FMAX_F80,
+                                               RTLIB::FMAX_F128,
+                                               RTLIB::FMAX_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::ADD_F32,
-                                           RTLIB::ADD_F64,
-                                           RTLIB::ADD_F80,
-                                           RTLIB::ADD_F128,
-                                           RTLIB::ADD_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::ADD_F32,
+                                               RTLIB::ADD_F64,
+                                               RTLIB::ADD_F80,
+                                               RTLIB::ADD_F128,
+                                               RTLIB::ADD_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCBRT(SDNode *N) {
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                           RTLIB::CBRT_F32,
+                                           RTLIB::CBRT_F64,
+                                           RTLIB::CBRT_F80,
+                                           RTLIB::CBRT_F128,
+                                           RTLIB::CBRT_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::CEIL_F32,
-                                           RTLIB::CEIL_F64,
-                                           RTLIB::CEIL_F80,
-                                           RTLIB::CEIL_F128,
-                                           RTLIB::CEIL_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::CEIL_F32,
+                                              RTLIB::CEIL_F64,
+                                              RTLIB::CEIL_F80,
+                                              RTLIB::CEIL_F128,
+                                              RTLIB::CEIL_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
@@ -288,231 +336,170 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::COS_F32,
-                                           RTLIB::COS_F64,
-                                           RTLIB::COS_F80,
-                                           RTLIB::COS_F128,
-                                           RTLIB::COS_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::COS_F32,
+                                              RTLIB::COS_F64,
+                                              RTLIB::COS_F80,
+                                              RTLIB::COS_F128,
+                                              RTLIB::COS_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::DIV_F32,
-                                           RTLIB::DIV_F64,
-                                           RTLIB::DIV_F80,
-                                           RTLIB::DIV_F128,
-                                           RTLIB::DIV_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::DIV_F32,
+                                               RTLIB::DIV_F64,
+                                               RTLIB::DIV_F80,
+                                               RTLIB::DIV_F128,
+                                               RTLIB::DIV_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::EXP_F32,
-                                           RTLIB::EXP_F64,
-                                           RTLIB::EXP_F80,
-                                           RTLIB::EXP_F128,
-                                           RTLIB::EXP_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::EXP_F32,
+                                              RTLIB::EXP_F64,
+                                              RTLIB::EXP_F80,
+                                              RTLIB::EXP_F128,
+                                              RTLIB::EXP_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::EXP2_F32,
-                                           RTLIB::EXP2_F64,
-                                           RTLIB::EXP2_F80,
-                                           RTLIB::EXP2_F128,
-                                           RTLIB::EXP2_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::EXP2_F32,
+                                              RTLIB::EXP2_F64,
+                                              RTLIB::EXP2_F80,
+                                              RTLIB::EXP2_F128,
+                                              RTLIB::EXP2_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::FLOOR_F32,
-                                           RTLIB::FLOOR_F64,
-                                           RTLIB::FLOOR_F80,
-                                           RTLIB::FLOOR_F128,
-                                           RTLIB::FLOOR_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::FLOOR_F32,
+                                              RTLIB::FLOOR_F64,
+                                              RTLIB::FLOOR_F80,
+                                              RTLIB::FLOOR_F128,
+                                              RTLIB::FLOOR_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::LOG_F32,
-                                           RTLIB::LOG_F64,
-                                           RTLIB::LOG_F80,
-                                           RTLIB::LOG_F128,
-                                           RTLIB::LOG_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::LOG_F32,
+                                              RTLIB::LOG_F64,
+                                              RTLIB::LOG_F80,
+                                              RTLIB::LOG_F128,
+                                              RTLIB::LOG_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::LOG2_F32,
-                                           RTLIB::LOG2_F64,
-                                           RTLIB::LOG2_F80,
-                                           RTLIB::LOG2_F128,
-                                           RTLIB::LOG2_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::LOG2_F32,
+                                              RTLIB::LOG2_F64,
+                                              RTLIB::LOG2_F80,
+                                              RTLIB::LOG2_F128,
+                                              RTLIB::LOG2_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::LOG10_F32,
-                                           RTLIB::LOG10_F64,
-                                           RTLIB::LOG10_F80,
-                                           RTLIB::LOG10_F128,
-                                           RTLIB::LOG10_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::LOG10_F32,
+                                              RTLIB::LOG10_F64,
+                                              RTLIB::LOG10_F80,
+                                              RTLIB::LOG10_F128,
+                                              RTLIB::LOG10_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+  bool IsStrict = N->isStrictFPOpcode();
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)),
-                     GetSoftenedFloat(N->getOperand(2)) };
+  unsigned Offset = IsStrict ? 1 : 0;
+  SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+                     GetSoftenedFloat(N->getOperand(1 + Offset)),
+                     GetSoftenedFloat(N->getOperand(2 + Offset)) };
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[3] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType(),
-                   N->getOperand(2).getValueType() };
+  EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(),
+                   N->getOperand(1 + Offset).getValueType(),
+                   N->getOperand(2 + Offset).getValueType() };
   CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::FMA_F32,
-                                           RTLIB::FMA_F64,
-                                           RTLIB::FMA_F80,
-                                           RTLIB::FMA_F128,
-                                           RTLIB::FMA_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG,
+                                                    GetFPLibCall(N->getValueType(0),
+                                                                 RTLIB::FMA_F32,
+                                                                 RTLIB::FMA_F64,
+                                                                 RTLIB::FMA_F80,
+                                                                 RTLIB::FMA_F128,
+                                                                 RTLIB::FMA_PPCF128),
+                         NVT, Ops, CallOptions, SDLoc(N), Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  return Tmp.first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::MUL_F32,
-                                           RTLIB::MUL_F64,
-                                           RTLIB::MUL_F80,
-                                           RTLIB::MUL_F128,
-                                           RTLIB::MUL_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::MUL_F32,
+                                               RTLIB::MUL_F64,
+                                               RTLIB::MUL_F80,
+                                               RTLIB::MUL_F128,
+                                               RTLIB::MUL_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::NEARBYINT_F32,
-                                           RTLIB::NEARBYINT_F64,
-                                           RTLIB::NEARBYINT_F80,
-                                           RTLIB::NEARBYINT_F128,
-                                           RTLIB::NEARBYINT_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::NEARBYINT_F32,
+                                              RTLIB::NEARBYINT_F64,
+                                              RTLIB::NEARBYINT_F80,
+                                              RTLIB::NEARBYINT_F128,
+                                              RTLIB::NEARBYINT_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
 
-  EVT FloatVT = N->getValueType(0);
-  if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
-    // Expand Y = FNEG(X) -> Y = X ^ sign mask
-    APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
-    return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
-                       DAG.getConstant(SignMask, dl, NVT));
-  }
-
-  // Expand Y = FNEG(X) -> Y = SUB -0.0, X
-  SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
-                     GetSoftenedFloat(N->getOperand(0)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::SUB_F32,
-                                           RTLIB::SUB_F64,
-                                           RTLIB::SUB_F80,
-                                           RTLIB::SUB_F128,
-                                           RTLIB::SUB_PPCF128),
-                         NVT, Ops, CallOptions, dl).first;
+  // Expand Y = FNEG(X) -> Y = X ^ sign mask
+  APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+  return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+                     DAG.getConstant(SignMask, dl, NVT));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+  bool IsStrict = N->isStrictFPOpcode();
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = N->getOperand(0);
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
 
-  // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
-  // entirely possible for both f16 and f32 to be legal, so use the fully
-  // hard-float FP_EXTEND rather than FP16_TO_FP.
-  if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
-    Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
-    if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat)
-      AddToWorklist(Op.getNode());
-  }
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
 
   if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
     Op = GetPromotedFloat(Op);
     // If the promotion did the FP_EXTEND to the destination type for us,
     // there's nothing left to do here.
-    if (Op.getValueType() == N->getValueType(0)) {
+    if (Op.getValueType() == N->getValueType(0))
       return BitConvertToInteger(Op);
+  }
+
+  // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
+  // entirely possible for both f16 and f32 to be legal, so use the fully
+  // hard-float FP_EXTEND rather than FP16_TO_FP.
+  if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+    if (IsStrict) {
+      Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
+                       { MVT::f32, MVT::Other }, { Chain, Op });
+      Chain = Op.getValue(1);
+    } else {
+      Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op);
     }
   }
 
   RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
   TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+  EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+  CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+                                                    CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  return Tmp.first;
 }
 
 // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -535,167 +522,127 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+  bool IsStrict = N->isStrictFPOpcode();
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = N->getOperand(0);
-  if (N->getValueType(0) == MVT::f16) {
-    // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a
-    // storage-only type get a chance to select things.
-    return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op);
-  }
-
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
   TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
+  EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
+  CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+                                                    CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  return Tmp.first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::POW_F32,
-                                           RTLIB::POW_F64,
-                                           RTLIB::POW_F80,
-                                           RTLIB::POW_F128,
-                                           RTLIB::POW_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::POW_F32,
+                                               RTLIB::POW_F64,
+                                               RTLIB::POW_F80,
+                                               RTLIB::POW_F128,
+                                               RTLIB::POW_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
-  assert(N->getOperand(1).getValueType() == MVT::i32 &&
+  bool IsStrict = N->isStrictFPOpcode();
+  unsigned Offset = IsStrict ? 1 : 0;
+  assert(N->getOperand(1 + Offset).getValueType() == MVT::i32 &&
          "Unsupported power type!");
+  RTLIB::Libcall LC = GetFPLibCall(N->getValueType(0),
+                                   RTLIB::POWI_F32,
+                                   RTLIB::POWI_F64,
+                                   RTLIB::POWI_F80,
+                                   RTLIB::POWI_F128,
+                                   RTLIB::POWI_PPCF128);
+  if (!TLI.getLibcallName(LC)) {
+    // Some targets don't have a powi libcall; use pow instead.
+    // FIXME: Implement this if some target needs it.
+    DAG.getContext()->emitError("Don't know how to soften fpowi to fpow");
+    return DAG.getUNDEF(N->getValueType(0));
+  }
+
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0 + Offset)),
+                     N->getOperand(1 + Offset) };
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
+  EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
+                   N->getOperand(1 + Offset).getValueType() };
   CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::POWI_F32,
-                                           RTLIB::POWI_F64,
-                                           RTLIB::POWI_F80,
-                                           RTLIB::POWI_F128,
-                                           RTLIB::POWI_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
+                                                    CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  return Tmp.first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::REM_F32,
-                                           RTLIB::REM_F64,
-                                           RTLIB::REM_F80,
-                                           RTLIB::REM_F128,
-                                           RTLIB::REM_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::REM_F32,
+                                               RTLIB::REM_F64,
+                                               RTLIB::REM_F80,
+                                               RTLIB::REM_F128,
+                                               RTLIB::REM_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::RINT_F32,
-                                           RTLIB::RINT_F64,
-                                           RTLIB::RINT_F80,
-                                           RTLIB::RINT_F128,
-                                           RTLIB::RINT_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::RINT_F32,
+                                              RTLIB::RINT_F64,
+                                              RTLIB::RINT_F80,
+                                              RTLIB::RINT_F128,
+                                              RTLIB::RINT_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::ROUND_F32,
-                                           RTLIB::ROUND_F64,
-                                           RTLIB::ROUND_F80,
-                                           RTLIB::ROUND_F128,
-                                           RTLIB::ROUND_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::ROUND_F32,
+                                              RTLIB::ROUND_F64,
+                                              RTLIB::ROUND_F80,
+                                              RTLIB::ROUND_F128,
+                                              RTLIB::ROUND_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::SIN_F32,
-                                           RTLIB::SIN_F64,
-                                           RTLIB::SIN_F80,
-                                           RTLIB::SIN_F128,
-                                           RTLIB::SIN_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::SIN_F32,
+                                              RTLIB::SIN_F64,
+                                              RTLIB::SIN_F80,
+                                              RTLIB::SIN_F128,
+                                              RTLIB::SIN_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::SQRT_F32,
-                                           RTLIB::SQRT_F64,
-                                           RTLIB::SQRT_F80,
-                                           RTLIB::SQRT_F128,
-                                           RTLIB::SQRT_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::SQRT_F32,
+                                              RTLIB::SQRT_F64,
+                                              RTLIB::SQRT_F80,
+                                              RTLIB::SQRT_F128,
+                                              RTLIB::SQRT_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
-                     GetSoftenedFloat(N->getOperand(1)) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[2] = { N->getOperand(0).getValueType(),
-                   N->getOperand(1).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::SUB_F32,
-                                           RTLIB::SUB_F64,
-                                           RTLIB::SUB_F80,
-                                           RTLIB::SUB_F128,
-                                           RTLIB::SUB_PPCF128),
-                         NVT, Ops, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                               RTLIB::SUB_F32,
+                                               RTLIB::SUB_F64,
+                                               RTLIB::SUB_F80,
+                                               RTLIB::SUB_F128,
+                                               RTLIB::SUB_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  if (N->getValueType(0) == MVT::f16)
-    return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
-
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                           RTLIB::TRUNC_F32,
-                                           RTLIB::TRUNC_F64,
-                                           RTLIB::TRUNC_F80,
-                                           RTLIB::TRUNC_F128,
-                                           RTLIB::TRUNC_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                              RTLIB::TRUNC_F32,
+                                              RTLIB::TRUNC_F64,
+                                              RTLIB::TRUNC_F80,
+                                              RTLIB::TRUNC_F128,
+                                              RTLIB::TRUNC_PPCF128));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
@@ -715,8 +662,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
                        L->getAAInfo());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
-    if (N != NewL.getValue(1).getNode())
-      ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+    ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
     return NewL;
   }
 
@@ -771,8 +717,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
-  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
-  EVT SVT = N->getOperand(0).getValueType();
+  bool IsStrict = N->isStrictFPOpcode();
+  bool Signed = N->getOpcode() == ISD::SINT_TO_FP ||
+                N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+  EVT SVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
   EVT RVT = N->getValueType(0);
   EVT NVT = EVT();
   SDLoc dl(N);
@@ -790,16 +738,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
   }
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
 
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   // Sign/zero extend the argument if the libcall takes a larger type.
   SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
-                           NVT, N->getOperand(0));
+                           NVT, N->getOperand(IsStrict ? 1 : 0));
   TargetLowering::MakeLibCallOptions CallOptions;
   CallOptions.setSExt(Signed);
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, LC,
-                         TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
-                         Op, CallOptions, dl).first;
+  CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+  std::pair<SDValue, SDValue> Tmp =
+      TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+                      Op, CallOptions, dl, Chain);
+
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  return Tmp.first;
 }
 
 
@@ -822,18 +774,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
 
   case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;
   case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
-  case ISD::FP_EXTEND:   Res = SoftenFloatOp_FP_EXTEND(N); break;
   case ISD::FP_TO_FP16:  // Same as FP_ROUND for softening purposes
+  case ISD::STRICT_FP_ROUND:
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_XINT(N); break;
+  case ISD::STRICT_LROUND:
   case ISD::LROUND:      Res = SoftenFloatOp_LROUND(N); break;
+  case ISD::STRICT_LLROUND:
   case ISD::LLROUND:     Res = SoftenFloatOp_LLROUND(N); break;
+  case ISD::STRICT_LRINT:
   case ISD::LRINT:       Res = SoftenFloatOp_LRINT(N); break;
+  case ISD::STRICT_LLRINT:
   case ISD::LLRINT:      Res = SoftenFloatOp_LLRINT(N); break;
   case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:
   case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
   case ISD::STORE:       Res = SoftenFloatOp_STORE(N, OpNo); break;
+  case ISD::FCOPYSIGN:   Res = SoftenFloatOp_FCOPYSIGN(N); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
@@ -845,7 +806,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
     return true;
 
   assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
-         "Invalid operand promotion");
+         "Invalid operand softening");
 
   ReplaceValueWith(SDValue(N, 0), Res);
   return false;
@@ -857,42 +818,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
   return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
-  // If we get here, the result must be legal but the source illegal.
-  EVT SVT = N->getOperand(0).getValueType();
-  EVT RVT = N->getValueType(0);
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
-  if (SVT == MVT::f16)
-    return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op);
-
-  RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
-  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
-
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
-}
-
-
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
   // We actually deal with the partially-softened FP_TO_FP16 node too, which
   // returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
-  assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16);
+  assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+         N->getOpcode() == ISD::STRICT_FP_ROUND);
 
-  EVT SVT = N->getOperand(0).getValueType();
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+  EVT SVT = Op.getValueType();
   EVT RVT = N->getValueType(0);
   EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
 
   RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
 
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  Op = GetSoftenedFloat(Op);
   TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
+  CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+                                                    CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict) {
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+    ReplaceValueWith(SDValue(N, 0), Tmp.first);
+    return SDValue();
+  }
+  return Tmp.first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -920,8 +873,12 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
-  bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
-  EVT SVT = N->getOperand(0).getValueType();
+  bool IsStrict = N->isStrictFPOpcode();
+  bool Signed = N->getOpcode() == ISD::FP_TO_SINT ||
+                N->getOpcode() == ISD::STRICT_FP_TO_SINT;
+
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+  EVT SVT = Op.getValueType();
   EVT RVT = N->getValueType(0);
   EVT NVT = EVT();
   SDLoc dl(N);
@@ -937,18 +894,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
     NVT = (MVT::SimpleValueType)IntVT;
     // The type needs to big enough to hold the result.
     if (NVT.bitsGE(RVT))
-      LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+      LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT);
   }
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
 
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  Op = GetSoftenedFloat(Op);
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;
+  CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+                                                    CallOptions, dl, Chain);
 
   // Truncate the result if the libcall returns a larger type.
-  return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
+  SDValue Res = DAG.getNode(ISD::TRUNCATE, dl, RVT, Tmp.first);
+
+  if (!IsStrict)
+    return Res;
+
+  ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -976,26 +941,39 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
-  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
-  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
+  SDValue Op1 = N->getOperand(IsStrict ? 2 : 1);
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  ISD::CondCode CCCode =
+      cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
+
+  EVT VT = Op0.getValueType();
+  SDValue NewLHS = GetSoftenedFloat(Op0);
+  SDValue NewRHS = GetSoftenedFloat(Op1);
+  TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), Op0, Op1,
+                          Chain, N->getOpcode() == ISD::STRICT_FSETCCS);
 
-  EVT VT = NewLHS.getValueType();
-  NewLHS = GetSoftenedFloat(NewLHS);
-  NewRHS = GetSoftenedFloat(NewRHS);
-  TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
-                          N->getOperand(0), N->getOperand(1));
-
-  // If softenSetCCOperands returned a scalar, use it.
-  if (!NewRHS.getNode()) {
-    assert(NewLHS.getValueType() == N->getValueType(0) &&
-           "Unexpected setcc expansion!");
-    return NewLHS;
+  // Update N to have the operands specified.
+  if (NewRHS.getNode()) {
+    if (IsStrict)
+      NewLHS = DAG.getNode(ISD::SETCC, SDLoc(N), N->getValueType(0), NewLHS,
+                           NewRHS, DAG.getCondCode(CCCode));
+    else
+      return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                            DAG.getCondCode(CCCode)), 0);
   }
 
-  // Otherwise, update N to have the operands specified.
-  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
-                                DAG.getCondCode(CCCode)),
-                 0);
+  // Otherwise, softenSetCCOperands returned a scalar, use it.
+  assert((NewRHS.getNode() || NewLHS.getValueType() == N->getValueType(0)) &&
+         "Unexpected setcc expansion!");
+
+  if (IsStrict) {
+    ReplaceValueWith(SDValue(N, 0), NewLHS);
+    ReplaceValueWith(SDValue(N, 1), Chain);
+    return SDValue();
+  }
+  return NewLHS;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
@@ -1016,72 +994,99 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
                       ST->getMemOperand());
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = BitConvertToInteger(N->getOperand(1));
+  SDLoc dl(N);
 
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
-                                           RTLIB::LROUND_F32,
-                                           RTLIB::LROUND_F64,
-                                           RTLIB::LROUND_F80,
-                                           RTLIB::LROUND_F128,
-                                           RTLIB::LROUND_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  EVT LVT = LHS.getValueType();
+  EVT ILVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits());
+  EVT RVT = RHS.getValueType();
+
+  unsigned LSize = LVT.getSizeInBits();
+  unsigned RSize = RVT.getSizeInBits();
+
+  // Shift right or sign-extend it if the two operands have different types.
+  int SizeDiff = RSize - LSize;
+  if (SizeDiff > 0) {
+    RHS =
+        DAG.getNode(ISD::SRL, dl, RVT, RHS,
+                    DAG.getConstant(SizeDiff, dl,
+                                    TLI.getShiftAmountTy(RHS.getValueType(),
+                                                         DAG.getDataLayout())));
+    RHS = DAG.getNode(ISD::TRUNCATE, dl, ILVT, RHS);
+  } else if (SizeDiff < 0) {
+    RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, RHS);
+    RHS =
+        DAG.getNode(ISD::SHL, dl, ILVT, RHS,
+                    DAG.getConstant(-SizeDiff, dl,
+                                    TLI.getShiftAmountTy(RHS.getValueType(),
+                                                         DAG.getDataLayout())));
+  }
+
+  RHS = DAG.getBitcast(LVT, RHS);
+  return DAG.getNode(ISD::FCOPYSIGN, dl, LVT, LHS, RHS);
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  bool IsStrict = N->isStrictFPOpcode();
+  unsigned Offset = IsStrict ? 1 : 0;
+  SDValue Op = GetSoftenedFloat(N->getOperand(0 + Offset));
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
-                                           RTLIB::LLROUND_F32,
-                                           RTLIB::LLROUND_F64,
-                                           RTLIB::LLROUND_F80,
-                                           RTLIB::LLROUND_F128,
-                                           RTLIB::LLROUND_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+  EVT OpVT = N->getOperand(0 + Offset).getValueType();
+  CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
+                                                    CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict) {
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+    ReplaceValueWith(SDValue(N, 0), Tmp.first);
+    return SDValue();
+  }
+
+  return Tmp.first;
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+  EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+  return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+                                             RTLIB::LROUND_F32,
+                                             RTLIB::LROUND_F64,
+                                             RTLIB::LROUND_F80,
+                                             RTLIB::LROUND_F128,
+                                             RTLIB::LROUND_PPCF128));
+}
 
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
-                                           RTLIB::LRINT_F32,
-                                           RTLIB::LRINT_F64,
-                                           RTLIB::LRINT_F80,
-                                           RTLIB::LRINT_F128,
-                                           RTLIB::LRINT_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+  EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+  return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+                                             RTLIB::LLROUND_F32,
+                                             RTLIB::LLROUND_F64,
+                                             RTLIB::LLROUND_F80,
+                                             RTLIB::LLROUND_F128,
+                                             RTLIB::LLROUND_PPCF128));
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+  EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+  return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+                                             RTLIB::LRINT_F32,
+                                             RTLIB::LRINT_F64,
+                                             RTLIB::LRINT_F80,
+                                             RTLIB::LRINT_F128,
+                                             RTLIB::LRINT_PPCF128));
+}
 
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
-  TargetLowering::MakeLibCallOptions CallOptions;
-  EVT OpsVT[1] = { N->getOperand(0).getValueType() };
-  CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
-  return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
-                                           RTLIB::LLRINT_F32,
-                                           RTLIB::LLRINT_F64,
-                                           RTLIB::LLRINT_F80,
-                                           RTLIB::LLRINT_F128,
-                                           RTLIB::LLRINT_PPCF128),
-                         NVT, Op, CallOptions, SDLoc(N)).first;
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+  EVT OpVT = N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType();
+  return SoftenFloatOp_Unary(N, GetFPLibCall(OpVT,
+                                             RTLIB::LLRINT_F32,
+                                             RTLIB::LLRINT_F64,
+                                             RTLIB::LLRINT_F80,
+                                             RTLIB::LLRINT_F128,
+                                             RTLIB::LLRINT_PPCF128));
 }
 
 //===----------------------------------------------------------------------===//
@@ -1122,35 +1127,61 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
 
   case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
   case ISD::FABS:       ExpandFloatRes_FABS(N, Lo, Hi); break;
+  case ISD::STRICT_FMINNUM:
   case ISD::FMINNUM:    ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
+  case ISD::STRICT_FMAXNUM:
   case ISD::FMAXNUM:    ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+  case ISD::STRICT_FADD:
   case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
+  case ISD::FCBRT:      ExpandFloatRes_FCBRT(N, Lo, Hi); break;
+  case ISD::STRICT_FCEIL:
   case ISD::FCEIL:      ExpandFloatRes_FCEIL(N, Lo, Hi); break;
   case ISD::FCOPYSIGN:  ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+  case ISD::STRICT_FCOS:
   case ISD::FCOS:       ExpandFloatRes_FCOS(N, Lo, Hi); break;
+  case ISD::STRICT_FDIV:
   case ISD::FDIV:       ExpandFloatRes_FDIV(N, Lo, Hi); break;
+  case ISD::STRICT_FEXP:
   case ISD::FEXP:       ExpandFloatRes_FEXP(N, Lo, Hi); break;
+  case ISD::STRICT_FEXP2:
   case ISD::FEXP2:      ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+  case ISD::STRICT_FFLOOR:
   case ISD::FFLOOR:     ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+  case ISD::STRICT_FLOG:
   case ISD::FLOG:       ExpandFloatRes_FLOG(N, Lo, Hi); break;
+  case ISD::STRICT_FLOG2:
   case ISD::FLOG2:      ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+  case ISD::STRICT_FLOG10:
   case ISD::FLOG10:     ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+  case ISD::STRICT_FMA:
   case ISD::FMA:        ExpandFloatRes_FMA(N, Lo, Hi); break;
+  case ISD::STRICT_FMUL:
   case ISD::FMUL:       ExpandFloatRes_FMUL(N, Lo, Hi); break;
+  case ISD::STRICT_FNEARBYINT:
   case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
   case ISD::FNEG:       ExpandFloatRes_FNEG(N, Lo, Hi); break;
+  case ISD::STRICT_FP_EXTEND:
   case ISD::FP_EXTEND:  ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+  case ISD::STRICT_FPOW:
   case ISD::FPOW:       ExpandFloatRes_FPOW(N, Lo, Hi); break;
+  case ISD::STRICT_FPOWI:
   case ISD::FPOWI:      ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+  case ISD::STRICT_FRINT:
   case ISD::FRINT:      ExpandFloatRes_FRINT(N, Lo, Hi); break;
+  case ISD::STRICT_FROUND:
   case ISD::FROUND:     ExpandFloatRes_FROUND(N, Lo, Hi); break;
+  case ISD::STRICT_FSIN:
   case ISD::FSIN:       ExpandFloatRes_FSIN(N, Lo, Hi); break;
+  case ISD::STRICT_FSQRT:
   case ISD::FSQRT:      ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+  case ISD::STRICT_FSUB:
   case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
+  case ISD::STRICT_FTRUNC:
   case ISD::FTRUNC:     ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
   case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+  case ISD::STRICT_FREM:
   case ISD::FREM:       ExpandFloatRes_FREM(N, Lo, Hi); break;
   }
 
@@ -1174,6 +1205,36 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
                          dl, NVT);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+                                            SDValue &Lo, SDValue &Hi) {
+  bool IsStrict = N->isStrictFPOpcode();
+  unsigned Offset = IsStrict ? 1 : 0;
+  SDValue Op = N->getOperand(0 + Offset);
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  TargetLowering::MakeLibCallOptions CallOptions;
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+                                                    Op, CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  GetPairElements(Tmp.first, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+                                             SDValue &Lo, SDValue &Hi) {
+  bool IsStrict = N->isStrictFPOpcode();
+  unsigned Offset = IsStrict ? 1 : 0;
+  SDValue Ops[] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset) };
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  TargetLowering::MakeLibCallOptions CallOptions;
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, N->getValueType(0),
+                                                    Ops, CallOptions, SDLoc(N),
+                                                    Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  GetPairElements(Tmp.first, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   assert(N->getValueType(0) == MVT::ppcf128 &&
@@ -1190,181 +1251,159 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo,
                                               SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::FMIN_F32, RTLIB::FMIN_F64,
-                                         RTLIB::FMIN_F80, RTLIB::FMIN_F128,
-                                         RTLIB::FMIN_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+                                       RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+                                       RTLIB::FMIN_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
                                               SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::FMAX_F32, RTLIB::FMAX_F64,
-                                         RTLIB::FMAX_F80, RTLIB::FMAX_F128,
-                                         RTLIB::FMAX_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+                                        RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+                                        RTLIB::FMAX_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::ADD_F32, RTLIB::ADD_F64,
-                                         RTLIB::ADD_F80, RTLIB::ADD_F128,
-                                         RTLIB::ADD_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::ADD_F32, RTLIB::ADD_F64,
+                                        RTLIB::ADD_F80, RTLIB::ADD_F128,
+                                        RTLIB::ADD_PPCF128), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCBRT(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::CBRT_F32,
+                                       RTLIB::CBRT_F64, RTLIB::CBRT_F80,
+                                       RTLIB::CBRT_F128,
+                                       RTLIB::CBRT_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::CEIL_F32, RTLIB::CEIL_F64,
-                                         RTLIB::CEIL_F80, RTLIB::CEIL_F128,
-                                         RTLIB::CEIL_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                       RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+                                       RTLIB::CEIL_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
                                                 SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::COPYSIGN_F32,
-                                         RTLIB::COPYSIGN_F64,
-                                         RTLIB::COPYSIGN_F80,
-                                         RTLIB::COPYSIGN_F128,
-                                         RTLIB::COPYSIGN_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::COPYSIGN_F32,
+                                        RTLIB::COPYSIGN_F64,
+                                        RTLIB::COPYSIGN_F80,
+                                        RTLIB::COPYSIGN_F128,
+                                        RTLIB::COPYSIGN_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::COS_F32, RTLIB::COS_F64,
-                                         RTLIB::COS_F80, RTLIB::COS_F128,
-                                         RTLIB::COS_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::COS_F32, RTLIB::COS_F64,
+                                       RTLIB::COS_F80, RTLIB::COS_F128,
+                                       RTLIB::COS_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                                   RTLIB::DIV_F32,
-                                                   RTLIB::DIV_F64,
-                                                   RTLIB::DIV_F80,
-                                                   RTLIB::DIV_F128,
-                                                   RTLIB::DIV_PPCF128),
-                                 N->getValueType(0), Ops, CallOptions,
-                                 SDLoc(N)).first;
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::DIV_F32,
+                                        RTLIB::DIV_F64,
+                                        RTLIB::DIV_F80,
+                                        RTLIB::DIV_F128,
+                                        RTLIB::DIV_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::EXP_F32, RTLIB::EXP_F64,
-                                         RTLIB::EXP_F80, RTLIB::EXP_F128,
-                                         RTLIB::EXP_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                       RTLIB::EXP_F80, RTLIB::EXP_F128,
+                                       RTLIB::EXP_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::EXP2_F32, RTLIB::EXP2_F64,
-                                         RTLIB::EXP2_F80, RTLIB::EXP2_F128,
-                                         RTLIB::EXP2_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                       RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+                                       RTLIB::EXP2_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
-                                         RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
-                                         RTLIB::FLOOR_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+                                       RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+                                       RTLIB::FLOOR_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::LOG_F32, RTLIB::LOG_F64,
-                                         RTLIB::LOG_F80, RTLIB::LOG_F128,
-                                         RTLIB::LOG_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                       RTLIB::LOG_F80, RTLIB::LOG_F128,
+                                       RTLIB::LOG_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::LOG2_F32, RTLIB::LOG2_F64,
-                                         RTLIB::LOG2_F80, RTLIB::LOG2_F128,
-                                         RTLIB::LOG2_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                       RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+                                       RTLIB::LOG2_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::LOG10_F32, RTLIB::LOG10_F64,
-                                         RTLIB::LOG10_F80, RTLIB::LOG10_F128,
-                                         RTLIB::LOG10_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+                                       RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+                                       RTLIB::LOG10_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
                                           SDValue &Hi) {
-  SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  bool IsStrict = N->isStrictFPOpcode();
+  unsigned Offset = IsStrict ? 1 : 0;
+  SDValue Ops[3] = { N->getOperand(0 + Offset), N->getOperand(1 + Offset),
+                     N->getOperand(2 + Offset) };
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   TargetLowering::MakeLibCallOptions CallOptions;
-  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
                                                    RTLIB::FMA_F32,
                                                    RTLIB::FMA_F64,
                                                    RTLIB::FMA_F80,
                                                    RTLIB::FMA_F128,
                                                    RTLIB::FMA_PPCF128),
                                  N->getValueType(0), Ops, CallOptions,
-                                 SDLoc(N)).first;
-  GetPairElements(Call, Lo, Hi);
+                                 SDLoc(N), Chain);
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  GetPairElements(Tmp.first, Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
                                                    RTLIB::MUL_F32,
                                                    RTLIB::MUL_F64,
                                                    RTLIB::MUL_F80,
                                                    RTLIB::MUL_F128,
-                                                   RTLIB::MUL_PPCF128),
-                                 N->getValueType(0), Ops, CallOptions,
-                                 SDLoc(N)).first;
-  GetPairElements(Call, Lo, Hi);
+                                                   RTLIB::MUL_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
                                                  SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::NEARBYINT_F32,
-                                         RTLIB::NEARBYINT_F64,
-                                         RTLIB::NEARBYINT_F80,
-                                         RTLIB::NEARBYINT_F128,
-                                         RTLIB::NEARBYINT_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::NEARBYINT_F32,
+                                       RTLIB::NEARBYINT_F64,
+                                       RTLIB::NEARBYINT_F80,
+                                       RTLIB::NEARBYINT_F128,
+                                       RTLIB::NEARBYINT_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
@@ -1379,106 +1418,105 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
                                                 SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
-  Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+  bool IsStrict = N->isStrictFPOpcode();
+
+  SDValue Chain;
+  if (IsStrict) {
+    // If the expanded type is the same as the input type, just bypass the node.
+    if (NVT == N->getOperand(1).getValueType()) {
+      Hi = N->getOperand(1);
+      Chain = N->getOperand(0);
+    } else {
+      // Other we need to extend.
+      Hi = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { NVT, MVT::Other },
+                       { N->getOperand(0), N->getOperand(1) });
+      Chain = Hi.getValue(1);
+    }
+  } else {
+    Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0));
+  }
+
   Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT),
                                  APInt(NVT.getSizeInBits(), 0)), dl, NVT);
+
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Chain);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::POW_F32, RTLIB::POW_F64,
-                                         RTLIB::POW_F80, RTLIB::POW_F128,
-                                         RTLIB::POW_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::POW_F32, RTLIB::POW_F64,
+                                        RTLIB::POW_F80, RTLIB::POW_F128,
+                                        RTLIB::POW_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::POWI_F32, RTLIB::POWI_F64,
-                                         RTLIB::POWI_F80, RTLIB::POWI_F128,
-                                         RTLIB::POWI_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                        RTLIB::POWI_F80, RTLIB::POWI_F128,
+                                        RTLIB::POWI_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::REM_F32, RTLIB::REM_F64,
-                                         RTLIB::REM_F80, RTLIB::REM_F128,
-                                         RTLIB::REM_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::REM_F32, RTLIB::REM_F64,
+                                        RTLIB::REM_F80, RTLIB::REM_F128,
+                                        RTLIB::REM_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::RINT_F32, RTLIB::RINT_F64,
-                                         RTLIB::RINT_F80, RTLIB::RINT_F128,
-                                         RTLIB::RINT_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                       RTLIB::RINT_F80, RTLIB::RINT_F128,
+                                       RTLIB::RINT_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::ROUND_F32,
-                                         RTLIB::ROUND_F64,
-                                         RTLIB::ROUND_F80,
-                                         RTLIB::ROUND_F128,
-                                         RTLIB::ROUND_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::ROUND_F32,
+                                       RTLIB::ROUND_F64,
+                                       RTLIB::ROUND_F80,
+                                       RTLIB::ROUND_F128,
+                                       RTLIB::ROUND_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::SIN_F32, RTLIB::SIN_F64,
-                                         RTLIB::SIN_F80, RTLIB::SIN_F128,
-                                         RTLIB::SIN_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                       RTLIB::SIN_F80, RTLIB::SIN_F128,
+                                       RTLIB::SIN_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
                                             SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::SQRT_F32, RTLIB::SQRT_F64,
-                                         RTLIB::SQRT_F80, RTLIB::SQRT_F128,
-                                         RTLIB::SQRT_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                       RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+                                       RTLIB::SQRT_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
-  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  TargetLowering::MakeLibCallOptions CallOptions;
-  SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
-                                                   RTLIB::SUB_F32,
-                                                   RTLIB::SUB_F64,
-                                                   RTLIB::SUB_F80,
-                                                   RTLIB::SUB_F128,
-                                                   RTLIB::SUB_PPCF128),
-                                 N->getValueType(0), Ops, CallOptions,
-                                 SDLoc(N)).first;
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
+                                        RTLIB::SUB_F32,
+                                        RTLIB::SUB_F64,
+                                        RTLIB::SUB_F80,
+                                        RTLIB::SUB_F128,
+                                        RTLIB::SUB_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
                                              SDValue &Lo, SDValue &Hi) {
-  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
-                                         RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
-                                         RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
-                                         RTLIB::TRUNC_PPCF128),
-                            N, false);
-  GetPairElements(Call, Lo, Hi);
+  ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0),
+                                       RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                       RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+                                       RTLIB::TRUNC_PPCF128), Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
@@ -1619,8 +1657,11 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
 
   case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
   case ISD::FCOPYSIGN:  Res = ExpandFloatOp_FCOPYSIGN(N); break;
+  case ISD::STRICT_FP_ROUND:
   case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
+  case ISD::STRICT_FP_TO_SINT:
   case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
   case ISD::LROUND:     Res = ExpandFloatOp_LROUND(N); break;
   case ISD::LLROUND:    Res = ExpandFloatOp_LLROUND(N); break;
@@ -1709,34 +1750,72 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
-  assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+  bool IsStrict = N->isStrictFPOpcode();
+  assert(N->getOperand(IsStrict ? 1 : 0).getValueType() == MVT::ppcf128 &&
          "Logic only correct for ppcf128!");
   SDValue Lo, Hi;
-  GetExpandedFloat(N->getOperand(0), Lo, Hi);
-  // Round it the rest of the way (e.g. to f32) if needed.
-  return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
-                     N->getValueType(0), Hi, N->getOperand(1));
+  GetExpandedFloat(N->getOperand(IsStrict ? 1 : 0), Lo, Hi);
+
+  if (!IsStrict)
+    // Round it the rest of the way (e.g. to f32) if needed.
+    return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+                       N->getValueType(0), Hi, N->getOperand(1));
+
+  // Eliminate the node if the input float type is the same as the output float
+  // type.
+  if (Hi.getValueType() == N->getValueType(0)) {
+    // Connect the output chain to the input chain, unlinking the node.
+    ReplaceValueWith(SDValue(N, 1), N->getOperand(0));
+    ReplaceValueWith(SDValue(N, 0), Hi);
+    return SDValue();
+  }
+
+  SDValue Expansion = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+                                  {N->getValueType(0), MVT::Other},
+                                  {N->getOperand(0), Hi, N->getOperand(2)});
+  ReplaceValueWith(SDValue(N, 1), Expansion.getValue(1));
+  ReplaceValueWith(SDValue(N, 0), Expansion);
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
   EVT RVT = N->getValueType(0);
   SDLoc dl(N);
 
-  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
   TargetLowering::MakeLibCallOptions CallOptions;
-  return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+                                                    CallOptions, dl, Chain);
+  if (!IsStrict)
+    return Tmp.first;
+
+  ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  ReplaceValueWith(SDValue(N, 0), Tmp.first);
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
   EVT RVT = N->getValueType(0);
   SDLoc dl(N);
 
-  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
   TargetLowering::MakeLibCallOptions CallOptions;
-  return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
-                         CallOptions, dl).first;
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
+                                                    CallOptions, dl, Chain);
+  if (!IsStrict)
+    return Tmp.first;
+
+  ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  ReplaceValueWith(SDValue(N, 0), Tmp.first);
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1800,7 +1879,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
   EVT RVT = N->getValueType(0);
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  EVT RetVT = N->getOperand(0).getValueType();
   TargetLowering::MakeLibCallOptions CallOptions;
   return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
                                            RTLIB::LROUND_F32,
@@ -1813,7 +1892,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
   EVT RVT = N->getValueType(0);
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  EVT RetVT = N->getOperand(0).getValueType();
   TargetLowering::MakeLibCallOptions CallOptions;
   return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
                                            RTLIB::LLROUND_F32,
@@ -1826,7 +1905,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
   EVT RVT = N->getValueType(0);
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  EVT RetVT = N->getOperand(0).getValueType();
   TargetLowering::MakeLibCallOptions CallOptions;
   return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
                                            RTLIB::LRINT_F32,
@@ -1839,7 +1918,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
   EVT RVT = N->getValueType(0);
-  EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+  EVT RetVT = N->getOperand(0).getValueType();
   TargetLowering::MakeLibCallOptions CallOptions;
   return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
                                            RTLIB::LLRINT_F32,
@@ -1963,12 +2042,11 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) {
 // code.
 SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) {
   EVT VT = N->getValueType(0);
-  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Op0 = GetPromotedFloat(N->getOperand(0));
   SDValue Op1 = GetPromotedFloat(N->getOperand(1));
   ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
 
-  return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode);
+  return DAG.getSetCC(SDLoc(N), VT, Op0, Op1, CCCode);
 
 }
 
@@ -2026,6 +2104,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
 
     // Unary FP Operations
     case ISD::FABS:
+    case ISD::FCBRT:
     case ISD::FCEIL:
     case ISD::FCOS:
     case ISD::FEXP:
@@ -2304,7 +2383,6 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
 
 SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
   EVT VT = N->getValueType(0);
-  EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
 
   AtomicSDNode *AM = cast<AtomicSDNode>(N);
   SDLoc SL(N);
@@ -2318,13 +2396,19 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
                     { AM->getChain(), AM->getBasePtr(), CastVal },
                     AM->getMemOperand());
 
-  SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
-                                   NewAtomic);
+  SDValue Result = NewAtomic;
+
+  if (getTypeAction(VT) == TargetLowering::TypePromoteFloat) {
+    EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+    Result = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+                                     NewAtomic);
+  }
+
   // Legalize the chain result by replacing uses of the old value chain with the
   // new one
   ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
 
-  return ResultCast;
+  return Result;
 
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index d5c1b539adbd..0e46f8d68f83 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -75,6 +75,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SELECT:      Res = PromoteIntRes_SELECT(N); break;
   case ISD::VSELECT:     Res = PromoteIntRes_VSELECT(N); break;
   case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:
   case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
   case ISD::SMIN:
   case ISD::SMAX:        Res = PromoteIntRes_SExtIntBinOp(N); break;
@@ -158,6 +160,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:  Res = PromoteIntRes_MULFIX(N); break;
 
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX:     Res = PromoteIntRes_DIVFIX(N); break;
+
   case ISD::ABS:         Res = PromoteIntRes_ABS(N); break;
 
   case ISD::ATOMIC_LOAD:
@@ -337,8 +342,21 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     // The input is widened to the same size. Convert to the widened value.
     // Make sure that the outgoing value is not a vector, because this would
     // make us bitcast between two vectors which are legalized in different ways.
-    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
-      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) {
+      SDValue Res =
+        DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
+
+      // For big endian targets we need to shift the casted value or the
+      // interesting bits will end up at the wrong place.
+      if (DAG.getDataLayout().isBigEndian()) {
+        unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+        EVT ShiftAmtTy = TLI.getShiftAmountTy(NOutVT, DAG.getDataLayout());
+        assert(ShiftAmt < NOutVT.getSizeInBits() && "Too large shift amount!");
+        Res = DAG.getNode(ISD::SRL, dl, NOutVT, Res,
+                          DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+      }
+      return Res;
+    }
     // If the output type is also a vector and widening it to the same size
     // as the widened input type would be a legal type, we can widen the bitcast
     // and handle the promotion after.
@@ -365,15 +383,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
                      CreateStackStoreLoad(InOp, OutVT));
 }
 
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit the shift amount
+// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
 // in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(unsigned Val, EVT VT,
-                                       const TargetLowering &TLI,
+static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
                                        SelectionDAG &DAG) {
   EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-  // If the value won't fit in the prefered type, just use something safe. It
-  // will be legalized when the shift is expanded.
-  if ((Log2_32(Val) + 1) > ShiftVT.getScalarSizeInBits())
+  // If any possible shift value won't fit in the prefered type, just use
+  // something safe. It will be legalized when the shift is expanded.
+  if (!ShiftVT.isVector() &&
+      ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
     ShiftVT = MVT::i32;
   return ShiftVT;
 }
@@ -385,7 +403,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
   return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
                      DAG.getConstant(DiffBits, dl, ShiftVT));
 }
@@ -397,7 +415,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
   SDLoc dl(N);
 
   unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
-  EVT ShiftVT = getShiftAmountTyForConstant(DiffBits, NVT, TLI, DAG);
+  EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
   return DAG.getNode(ISD::SRL, dl, NVT,
                      DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
                      DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -592,8 +610,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
 
   SDLoc dl(N);
   SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
-                                  N->getMask(), ExtPassThru, N->getMemoryVT(),
-                                  N->getMemOperand(), ISD::EXTLOAD);
+                                  N->getOffset(), N->getMask(), ExtPassThru,
+                                  N->getMemoryVT(), N->getMemOperand(),
+                                  N->getAddressingMode(), ISD::EXTLOAD);
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -604,7 +623,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
   assert(NVT == ExtPassThru.getValueType() &&
-      "Gather result type and the passThru agrument type should be the same");
+      "Gather result type and the passThru argument type should be the same");
 
   SDLoc dl(N);
   SDValue Ops[] = {N->getChain(), ExtPassThru, N->getMask(), N->getBasePtr(),
@@ -762,6 +781,71 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
                      N->getOperand(2));
 }
 
+static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
+                                    unsigned Scale, const TargetLowering &TLI,
+                                    SelectionDAG &DAG) {
+  EVT VT = LHS.getValueType();
+  bool Signed = N->getOpcode() == ISD::SDIVFIX;
+
+  SDLoc dl(N);
+  // See if we can perform the division in this type without widening.
+  if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
+                                          DAG))
+    return V;
+
+  // If that didn't work, double the type width and try again. That must work,
+  // or something is wrong.
+  EVT WideVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 VT.getScalarSizeInBits() * 2);
+  if (Signed) {
+    LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
+    RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
+  } else {
+    LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT);
+    RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
+  }
+
+  // TODO: Saturation.
+
+  SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
+                                        DAG);
+  assert(Res && "Expanding DIVFIX with wide type failed?");
+  return DAG.getZExtOrTrunc(Res, dl, VT);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
+  SDLoc dl(N);
+  SDValue Op1Promoted, Op2Promoted;
+  bool Signed = N->getOpcode() == ISD::SDIVFIX;
+  if (Signed) {
+    Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+    Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+  } else {
+    Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+    Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+  }
+  EVT PromotedType = Op1Promoted.getValueType();
+  unsigned Scale = N->getConstantOperandVal(2);
+
+  SDValue Res;
+  // If the type is already legal and the operation is legal in that type, we
+  // should not early expand.
+  if (TLI.isTypeLegal(PromotedType)) {
+    TargetLowering::LegalizeAction Action =
+        TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
+    if (Action == TargetLowering::Legal || Action == TargetLowering::Custom)
+      Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+                        Op2Promoted, N->getOperand(2));
+  }
+
+  if (!Res)
+    Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG);
+
+  // TODO: Saturation.
+
+  return Res;
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
   if (ResNo == 1)
     return PromoteIntRes_Overflow(N);
@@ -816,7 +900,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
-  EVT InVT = N->getOperand(0).getValueType();
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+  EVT InVT = N->getOperand(OpNo).getValueType();
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
 
   EVT SVT = getSetCCResultType(InVT);
@@ -835,12 +920,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
   }
 
   SDLoc dl(N);
-  assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() &&
+  assert(SVT.isVector() == N->getOperand(OpNo).getValueType().isVector() &&
          "Vector compare must return a vector result!");
 
   // Get the SETCC result using the canonical SETCC type.
-  SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
-                              N->getOperand(1), N->getOperand(2));
+  SDValue SetCC;
+  if (N->isStrictFPOpcode()) {
+    EVT VTs[] = {SVT, MVT::Other};
+    SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
+                       N->getOperand(2), N->getOperand(3)};
+    SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
+    // Legalize the chain result - switch anything that used the old chain to
+    // use the new one.
+    ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
+  } else
+    SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
+                        N->getOperand(1), N->getOperand(2));
 
   // Convert to the expected type.
   return DAG.getSExtOrTrunc(SetCC, dl, NVT);
@@ -1058,8 +1153,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   if (N->getOpcode() == ISD::UMULO) {
     // Unsigned overflow occurred if the high part is non-zero.
     unsigned Shift = SmallVT.getScalarSizeInBits();
-    EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
-                                              TLI, DAG);
+    EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
     SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
                              DAG.getConstant(Shift, DL, ShiftTy));
     Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
@@ -1176,6 +1270,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::SETCC:        Res = PromoteIntOp_SETCC(N, OpNo); break;
   case ISD::SIGN_EXTEND:  Res = PromoteIntOp_SIGN_EXTEND(N); break;
   case ISD::SINT_TO_FP:   Res = PromoteIntOp_SINT_TO_FP(N); break;
+  case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
   case ISD::STORE:        Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
                                                    OpNo); break;
   case ISD::MSTORE:       Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
@@ -1189,6 +1284,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::TRUNCATE:     Res = PromoteIntOp_TRUNCATE(N); break;
   case ISD::FP16_TO_FP:
   case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
+  case ISD::STRICT_UINT_TO_FP:  Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
   case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
   case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
 
@@ -1209,7 +1305,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::SMULFIX:
   case ISD::SMULFIXSAT:
   case ISD::UMULFIX:
-  case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;
+  case ISD::UMULFIXSAT:
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break;
 
   case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
 
@@ -1465,6 +1563,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
                                 SExtPromotedInteger(N->getOperand(0))), 0);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                SExtPromotedInteger(N->getOperand(1))), 0);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
   assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
   SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
@@ -1486,11 +1589,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
   SDLoc dl(N);
 
   bool TruncateStore = false;
-  if (OpNo == 3) {
+  if (OpNo == 4) {
     Mask = PromoteTargetBoolean(Mask, DataVT);
     // Update in place.
     SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
-    NewOps[3] = Mask;
+    NewOps[4] = Mask;
     return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
   } else { // Data operand
     assert(OpNo == 1 && "Unexpected operand for promotion");
@@ -1498,14 +1601,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
     TruncateStore = true;
   }
 
-  return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
-                            N->getMemoryVT(), N->getMemOperand(),
+  return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
+                            N->getOffset(), Mask, N->getMemoryVT(),
+                            N->getMemOperand(), N->getAddressingMode(),
                             TruncateStore, N->isCompressingStore());
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
                                              unsigned OpNo) {
-  assert(OpNo == 2 && "Only know how to promote the mask!");
+  assert(OpNo == 3 && "Only know how to promote the mask!");
   EVT DataVT = N->getValueType(0);
   SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
   SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
@@ -1563,6 +1667,11 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
                                 ZExtPromotedInteger(N->getOperand(0))), 0);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
   SDLoc dl(N);
   SDValue Op = GetPromotedInteger(N->getOperand(0));
@@ -1584,7 +1693,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
   return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_FIX(SDNode *N) {
   SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
   return SDValue(
       DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
@@ -1697,10 +1806,14 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
   case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
+  case ISD::STRICT_FP_TO_SINT:
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
-  case ISD::LLROUND:     ExpandIntRes_LLROUND(N, Lo, Hi); break;
-  case ISD::LLRINT:      ExpandIntRes_LLRINT(N, Lo, Hi); break;
+  case ISD::STRICT_LLROUND:
+  case ISD::STRICT_LLRINT:
+  case ISD::LLROUND:
+  case ISD::LLRINT:      ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break;
   case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
   case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
   case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -1794,6 +1907,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
 
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
+
   case ISD::VECREDUCE_ADD:
   case ISD::VECREDUCE_MUL:
   case ISD::VECREDUCE_AND:
@@ -1817,7 +1933,11 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   RTLIB::Libcall LC = RTLIB::getSYNC(Opc, VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
 
-  return ExpandChainLibCall(LC, Node, false);
+  EVT RetVT = Node->getValueType(0);
+  SmallVector<SDValue, 4> Ops(Node->op_begin() + 1, Node->op_end());
+  TargetLowering::MakeLibCallOptions CallOptions;
+  return TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, SDLoc(Node),
+                         Node->getOperand(0));
 }
 
 /// N is a shift by a value that needs to be expanded,
@@ -2304,11 +2424,27 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
 
   SDValue Ovf;
 
-  bool HasOpCarry = TLI.isOperationLegalOrCustom(
-      N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
-      TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+  unsigned CarryOp, NoCarryOp;
+  ISD::CondCode Cond;
+  switch(N->getOpcode()) {
+    case ISD::UADDO:
+      CarryOp = ISD::ADDCARRY;
+      NoCarryOp = ISD::ADD;
+      Cond = ISD::SETULT;
+      break;
+    case ISD::USUBO:
+      CarryOp = ISD::SUBCARRY;
+      NoCarryOp = ISD::SUB;
+      Cond = ISD::SETUGT;
+      break;
+    default:
+      llvm_unreachable("Node has unexpected Opcode");
+  }
 
-  if (HasOpCarry) {
+  bool HasCarryOp = TLI.isOperationLegalOrCustom(
+      CarryOp, TLI.getTypeToExpandTo(*DAG.getContext(), LHS.getValueType()));
+
+  if (HasCarryOp) {
     // Expand the subcomponents.
     SDValue LHSL, LHSH, RHSL, RHSH;
     GetExpandedInteger(LHS, LHSL, LHSH);
@@ -2317,22 +2453,19 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
     SDValue LoOps[2] = { LHSL, RHSL };
     SDValue HiOps[3] = { LHSH, RHSH };
 
-    unsigned Opc = N->getOpcode() == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
     Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
     HiOps[2] = Lo.getValue(1);
-    Hi = DAG.getNode(Opc, dl, VTList, HiOps);
+    Hi = DAG.getNode(CarryOp, dl, VTList, HiOps);
 
     Ovf = Hi.getValue(1);
   } else {
     // Expand the result by simply replacing it with the equivalent
     // non-overflow-checking operation.
-    auto Opc = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
-    SDValue Sum = DAG.getNode(Opc, dl, LHS.getValueType(), LHS, RHS);
+    SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
     SplitInteger(Sum, Lo, Hi);
 
     // Calculate the overflow: addition overflows iff a + b < a, and subtraction
     // overflows iff a - b > a.
-    auto Cond = N->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
     Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
   }
 
@@ -2544,7 +2677,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
 
-  SDValue Op = N->getOperand(0);
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
   if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
     Op = GetPromotedFloat(Op);
 
@@ -2552,8 +2687,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
   TargetLowering::MakeLibCallOptions CallOptions;
   CallOptions.setSExt(true);
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
-               Lo, Hi);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+                                                    CallOptions, dl, Chain);
+  SplitInteger(Tmp.first, Lo, Hi);
+
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
@@ -2561,75 +2700,94 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
   SDLoc dl(N);
   EVT VT = N->getValueType(0);
 
-  SDValue Op = N->getOperand(0);
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
   if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
     Op = GetPromotedFloat(Op);
 
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
   TargetLowering::MakeLibCallOptions CallOptions;
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
-               Lo, Hi);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op,
+                                                    CallOptions, dl, Chain);
+  SplitInteger(Tmp.first, Lo, Hi);
+
+  if (IsStrict)
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
 }
 
-void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
-                                            SDValue &Hi) {
-  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
-  if (VT == MVT::f32)
-    LC = RTLIB::LLROUND_F32;
-  else if (VT == MVT::f64)
-    LC = RTLIB::LLROUND_F64;
-  else if (VT == MVT::f80)
-    LC = RTLIB::LLROUND_F80;
-  else if (VT == MVT::f128)
-    LC = RTLIB::LLROUND_F128;
-  else if (VT == MVT::ppcf128)
-    LC = RTLIB::LLROUND_PPCF128;
-  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo,
+                                                   SDValue &Hi) {
+  SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
 
-  SDValue Op = N->getOperand(0);
-  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
-    Op = GetPromotedFloat(Op);
+  assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat &&
+         "Input type needs to be promoted!");
 
-  SDLoc dl(N);
-  EVT RetVT = N->getValueType(0);
-  TargetLowering::MakeLibCallOptions CallOptions;
-  CallOptions.setSExt(true);
-  SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
-               Lo, Hi);
-}
+  EVT VT = Op.getValueType();
 
-void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
-                                            SDValue &Hi) {
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-  EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
-  if (VT == MVT::f32)
-    LC = RTLIB::LLRINT_F32;
-  else if (VT == MVT::f64)
-    LC = RTLIB::LLRINT_F64;
-  else if (VT == MVT::f80)
-    LC = RTLIB::LLRINT_F80;
-  else if (VT == MVT::f128)
-    LC = RTLIB::LLRINT_F128;
-  else if (VT == MVT::ppcf128)
-    LC = RTLIB::LLRINT_PPCF128;
-  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
-
-  SDValue Op = N->getOperand(0);
-  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
-    Op = GetPromotedFloat(Op);
+  if (N->getOpcode() == ISD::LLROUND ||
+      N->getOpcode() == ISD::STRICT_LLROUND) {
+    if (VT == MVT::f32)
+      LC = RTLIB::LLROUND_F32;
+    else if (VT == MVT::f64)
+      LC = RTLIB::LLROUND_F64;
+    else if (VT == MVT::f80)
+      LC = RTLIB::LLROUND_F80;
+    else if (VT == MVT::f128)
+      LC = RTLIB::LLROUND_F128;
+    else if (VT == MVT::ppcf128)
+      LC = RTLIB::LLROUND_PPCF128;
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+  } else if (N->getOpcode() == ISD::LLRINT ||
+             N->getOpcode() == ISD::STRICT_LLRINT) {
+    if (VT == MVT::f32)
+      LC = RTLIB::LLRINT_F32;
+    else if (VT == MVT::f64)
+      LC = RTLIB::LLRINT_F64;
+    else if (VT == MVT::f80)
+      LC = RTLIB::LLRINT_F80;
+    else if (VT == MVT::f128)
+      LC = RTLIB::LLRINT_F128;
+    else if (VT == MVT::ppcf128)
+      LC = RTLIB::LLRINT_PPCF128;
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+  } else
+    llvm_unreachable("Unexpected opcode!");
 
   SDLoc dl(N);
   EVT RetVT = N->getValueType(0);
+  SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue();
+
   TargetLowering::MakeLibCallOptions CallOptions;
   CallOptions.setSExt(true);
-  SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
-               Lo, Hi);
+  std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
+                                                    Op, CallOptions, dl,
+                                                    Chain);
+  SplitInteger(Tmp.first, Lo, Hi);
+
+  if (N->isStrictFPOpcode())
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
+  if (N->isAtomic()) {
+    // It's typical to have larger CAS than atomic load instructions.
+    SDLoc dl(N);
+    EVT VT = N->getMemoryVT();
+    SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    SDValue Swap = DAG.getAtomicCmpSwap(
+        ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+        VT, VTs, N->getOperand(0),
+        N->getOperand(1), Zero, Zero, N->getMemOperand());
+    ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
+    ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
+    return;
+  }
+  
   if (ISD::isNormalLoad(N)) {
     ExpandRes_NormalLoad(N, Lo, Hi);
     return;
@@ -2684,8 +2842,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
 
     // Increment the pointer to the other half.
     unsigned IncrementSize = NVT.getSizeInBits()/8;
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+    Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
     Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
                         N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
                         MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
@@ -2709,8 +2866,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
                         Alignment, MMOFlags, AAInfo);
 
     // Increment the pointer to the other half.
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+    Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
     // Load the rest of the low bits.
     Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
                         N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3068,6 +3224,13 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
   Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
+                                  N->getConstantOperandVal(2), TLI, DAG);
+  SplitInteger(Res, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
                                              SDValue &Lo, SDValue &Hi) {
   SDValue LHS = Node->getOperand(0);
@@ -3596,9 +3759,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::SELECT_CC:         Res = ExpandIntOp_SELECT_CC(N); break;
   case ISD::SETCC:             Res = ExpandIntOp_SETCC(N); break;
   case ISD::SETCCCARRY:        Res = ExpandIntOp_SETCCCARRY(N); break;
+  case ISD::STRICT_SINT_TO_FP:
   case ISD::SINT_TO_FP:        Res = ExpandIntOp_SINT_TO_FP(N); break;
   case ISD::STORE:   Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
   case ISD::TRUNCATE:          Res = ExpandIntOp_TRUNCATE(N); break;
+  case ISD::STRICT_UINT_TO_FP:
   case ISD::UINT_TO_FP:        Res = ExpandIntOp_UINT_TO_FP(N); break;
 
   case ISD::SHL:
@@ -3865,17 +4030,37 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
-  SDValue Op = N->getOperand(0);
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
   EVT DstVT = N->getValueType(0);
   RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this SINT_TO_FP!");
   TargetLowering::MakeLibCallOptions CallOptions;
   CallOptions.setSExt(true);
-  return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;
+  std::pair<SDValue, SDValue> Tmp =
+      TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+  if (!IsStrict)
+    return Tmp.first;
+
+  ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  ReplaceValueWith(SDValue(N, 0), Tmp.first);
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+  if (N->isAtomic()) {
+    // It's typical to have larger CAS than atomic store instructions.
+    SDLoc dl(N);
+    SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl,
+                                 N->getMemoryVT(),
+                                 N->getOperand(0), N->getOperand(2),
+                                 N->getOperand(1),
+                                 N->getMemOperand());
+    return Swap.getValue(1);
+  }
   if (ISD::isNormalStore(N))
     return ExpandOp_NormalStore(N, OpNo);
 
@@ -3965,81 +4150,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
-  SDValue Op = N->getOperand(0);
-  EVT SrcVT = Op.getValueType();
+  bool IsStrict = N->isStrictFPOpcode();
+  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
+  SDValue Op = N->getOperand(IsStrict ? 1 : 0);
   EVT DstVT = N->getValueType(0);
-  SDLoc dl(N);
-
-  // The following optimization is valid only if every value in SrcVT (when
-  // treated as signed) is representable in DstVT.  Check that the mantissa
-  // size of DstVT is >= than the number of bits in SrcVT -1.
-  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT);
-  if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 &&
-      TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
-    // Do a signed conversion then adjust the result.
-    SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
-    SignedConv = TLI.LowerOperation(SignedConv, DAG);
-
-    // The result of the signed conversion needs adjusting if the 'sign bit' of
-    // the incoming integer was set.  To handle this, we dynamically test to see
-    // if it is set, and, if so, add a fudge factor.
-
-    const uint64_t F32TwoE32  = 0x4F800000ULL;
-    const uint64_t F32TwoE64  = 0x5F800000ULL;
-    const uint64_t F32TwoE128 = 0x7F800000ULL;
-
-    APInt FF(32, 0);
-    if (SrcVT == MVT::i32)
-      FF = APInt(32, F32TwoE32);
-    else if (SrcVT == MVT::i64)
-      FF = APInt(32, F32TwoE64);
-    else if (SrcVT == MVT::i128)
-      FF = APInt(32, F32TwoE128);
-    else
-      llvm_unreachable("Unsupported UINT_TO_FP!");
-
-    // Check whether the sign bit is set.
-    SDValue Lo, Hi;
-    GetExpandedInteger(Op, Lo, Hi);
-    SDValue SignSet = DAG.getSetCC(dl,
-                                   getSetCCResultType(Hi.getValueType()),
-                                   Hi,
-                                   DAG.getConstant(0, dl, Hi.getValueType()),
-                                   ISD::SETLT);
-
-    // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
-    SDValue FudgePtr =
-        DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)),
-                            TLI.getPointerTy(DAG.getDataLayout()));
-
-    // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
-    SDValue Zero = DAG.getIntPtrConstant(0, dl);
-    SDValue Four = DAG.getIntPtrConstant(4, dl);
-    if (DAG.getDataLayout().isBigEndian())
-      std::swap(Zero, Four);
-    SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet,
-                                   Zero, Four);
-    unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
-    FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(),
-                           FudgePtr, Offset);
-    Alignment = std::min(Alignment, 4u);
-
-    // Load the value out, extending it from f32 to the destination float type.
-    // FIXME: Avoid the extend by constructing the right constant pool?
-    SDValue Fudge = DAG.getExtLoad(
-        ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
-        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
-        Alignment);
-    return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
-  }
-
-  // Otherwise, use a libcall.
-  RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+  RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this UINT_TO_FP!");
   TargetLowering::MakeLibCallOptions CallOptions;
   CallOptions.setSExt(true);
-  return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;
+  std::pair<SDValue, SDValue> Tmp =
+      TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain);
+
+  if (!IsStrict)
+    return Tmp.first;
+
+  ReplaceValueWith(SDValue(N, 1), Tmp.second);
+  ReplaceValueWith(SDValue(N, 0), Tmp.first);
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index b596c174a287..63ddb59fce68 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -204,7 +204,8 @@ bool DAGTypeLegalizer::run() {
   // non-leaves.
   for (SDNode &Node : DAG.allnodes()) {
     if (Node.getNumOperands() == 0) {
-      AddToWorklist(&Node);
+      Node.setNodeId(ReadyToProcess);
+      Worklist.push_back(&Node);
     } else {
       Node.setNodeId(Unanalyzed);
     }
@@ -974,68 +975,6 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
   return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
 }
 
-/// Convert the node into a libcall with the same prototype.
-SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
-                                     bool isSigned) {
-  TargetLowering::MakeLibCallOptions CallOptions;
-  CallOptions.setSExt(isSigned);
-  unsigned NumOps = N->getNumOperands();
-  SDLoc dl(N);
-  if (NumOps == 0) {
-    return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,
-                           dl).first;
-  } else if (NumOps == 1) {
-    SDValue Op = N->getOperand(0);
-    return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,
-                           dl).first;
-  } else if (NumOps == 2) {
-    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-    return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,
-                           dl).first;
-  }
-  SmallVector<SDValue, 8> Ops(NumOps);
-  for (unsigned i = 0; i < NumOps; ++i)
-    Ops[i] = N->getOperand(i);
-
-  return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;
-}
-
-/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
-/// the first operand is the in-chain.
-std::pair<SDValue, SDValue>
-DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node,
-                                     bool isSigned) {
-  SDValue InChain = Node->getOperand(0);
-
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
-    EVT ArgVT = Node->getOperand(i).getValueType();
-    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-    Entry.Node = Node->getOperand(i);
-    Entry.Ty = ArgTy;
-    Entry.IsSExt = isSigned;
-    Entry.IsZExt = !isSigned;
-    Args.push_back(Entry);
-  }
-  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
-                                         TLI.getPointerTy(DAG.getDataLayout()));
-
-  Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
-
-  TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(SDLoc(Node))
-      .setChain(InChain)
-      .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
-                    std::move(Args))
-      .setSExtResult(isSigned)
-      .setZExtResult(!isSigned);
-
-  std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
-  return CallInfo;
-}
-
 /// Promote the given target boolean to a target boolean of the given type.
 /// A target boolean is an integer value, not necessarily of type i1, the bits
 /// of which conform to getBooleanContents.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4afbae69128a..faae14444d51 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -215,10 +215,7 @@ private:
   SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo);
 
   SDValue JoinIntegers(SDValue Lo, SDValue Hi);
-  SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
 
-  std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
-                                                 SDNode *Node, bool isSigned);
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
 
   SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
@@ -228,11 +225,6 @@ private:
   void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
                     SDValue &Lo, SDValue &Hi);
 
-  void AddToWorklist(SDNode *N) {
-    N->setNodeId(ReadyToProcess);
-    Worklist.push_back(N);
-  }
-
   //===--------------------------------------------------------------------===//
   // Integer Promotion Support: LegalizeIntegerTypes.cpp
   //===--------------------------------------------------------------------===//
@@ -337,6 +329,7 @@ private:
   SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
   SDValue PromoteIntRes_MULFIX(SDNode *N);
+  SDValue PromoteIntRes_DIVFIX(SDNode *N);
   SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
   SDValue PromoteIntRes_VECREDUCE(SDNode *N);
   SDValue PromoteIntRes_ABS(SDNode *N);
@@ -362,9 +355,11 @@ private:
   SDValue PromoteIntOp_Shift(SDNode *N);
   SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
   SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
   SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_TRUNCATE(SDNode *N);
   SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
   SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
   SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
@@ -373,7 +368,7 @@ private:
   SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
   SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
-  SDValue PromoteIntOp_MULFIX(SDNode *N);
+  SDValue PromoteIntOp_FIX(SDNode *N);
   SDValue PromoteIntOp_FPOWI(SDNode *N);
   SDValue PromoteIntOp_VECREDUCE(SDNode *N);
 
@@ -411,8 +406,7 @@ private:
   void ExpandIntRes_FLT_ROUNDS        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
-  void ExpandIntRes_LLROUND           (SDNode *N, SDValue &Lo, SDValue &Hi);
-  void ExpandIntRes_LLRINT            (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LLROUND_LLRINT    (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -435,6 +429,7 @@ private:
   void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBSAT         (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_MULFIX            (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_DIVFIX            (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandIntRes_ATOMIC_LOAD       (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_VECREDUCE         (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -486,6 +481,8 @@ private:
 
   // Convert Float Results to Integer.
   void SoftenFloatResult(SDNode *N, unsigned ResNo);
+  SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
+  SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
   SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_BITCAST(SDNode *N);
   SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
@@ -495,6 +492,7 @@ private:
   SDValue SoftenFloatRes_FMINNUM(SDNode *N);
   SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
   SDValue SoftenFloatRes_FADD(SDNode *N);
+  SDValue SoftenFloatRes_FCBRT(SDNode *N);
   SDValue SoftenFloatRes_FCEIL(SDNode *N);
   SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
   SDValue SoftenFloatRes_FCOS(SDNode *N);
@@ -530,9 +528,9 @@ private:
 
   // Convert Float Operand to Integer.
   bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC);
   SDValue SoftenFloatOp_BITCAST(SDNode *N);
   SDValue SoftenFloatOp_BR_CC(SDNode *N);
-  SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
   SDValue SoftenFloatOp_LROUND(SDNode *N);
@@ -542,6 +540,7 @@ private:
   SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
   SDValue SoftenFloatOp_SETCC(SDNode *N);
   SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Float Expansion Support: LegalizeFloatTypes.cpp
@@ -559,10 +558,15 @@ private:
   // Float Result Expansion.
   void ExpandFloatResult(SDNode *N, unsigned ResNo);
   void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_Unary(SDNode *N, RTLIB::Libcall LC,
+                            SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_Binary(SDNode *N, RTLIB::Libcall LC,
+                             SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FABS      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMINNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMAXNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCBRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCOS      (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -676,7 +680,6 @@ private:
   SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
-  SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
   SDValue ScalarizeVecRes_FPOWI(SDNode *N);
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -688,7 +691,7 @@ private:
   SDValue ScalarizeVecRes_UNDEF(SDNode *N);
   SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
 
-  SDValue ScalarizeVecRes_MULFIX(SDNode *N);
+  SDValue ScalarizeVecRes_FIX(SDNode *N);
 
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -730,7 +733,7 @@ private:
   void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
                               SDValue &Lo, SDValue &Hi);
 
-  void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -804,6 +807,7 @@ private:
   SDValue WidenVSELECTAndMask(SDNode *N);
   SDValue WidenVecRes_SELECT_CC(SDNode* N);
   SDValue WidenVecRes_SETCC(SDNode* N);
+  SDValue WidenVecRes_STRICT_FSETCC(SDNode* N);
   SDValue WidenVecRes_UNDEF(SDNode *N);
   SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
 
@@ -833,6 +837,7 @@ private:
   SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
   SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
   SDValue WidenVecOp_SETCC(SDNode* N);
+  SDValue WidenVecOp_STRICT_FSETCC(SDNode* N);
   SDValue WidenVecOp_VSELECT(SDNode *N);
 
   SDValue WidenVecOp_Convert(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 5562f400b6e1..c45c62cabc05 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -169,9 +169,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
-  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                         DAG.getConstant(IncrementSize, dl,
-                                         StackPtr.getValueType()));
+  StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
 
   // Load the second half from the stack slot.
   Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
@@ -248,6 +246,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   SDLoc dl(N);
 
   LoadSDNode *LD = cast<LoadSDNode>(N);
+  assert(!LD->isAtomic() && "Atomics can not be split");
   EVT ValueVT = LD->getValueType(0);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
   SDValue Chain = LD->getChain();
@@ -262,8 +261,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
-  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+  Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl);
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
                    MinAlign(Alignment, IncrementSize),
@@ -459,6 +457,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
   SDLoc dl(N);
 
   StoreSDNode *St = cast<StoreSDNode>(N);
+  assert(!St->isAtomic() && "Atomics can not be split");
   EVT ValueVT = St->getValue().getValueType();
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
   SDValue Chain = St->getChain();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 15c3a0b6cfad..7d0b1ee6ae07 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -75,95 +75,95 @@ class VectorLegalizer {
   SDValue LegalizeOp(SDValue Op);
 
   /// Assuming the node is legal, "legalize" the results.
-  SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+  SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
+
+  /// Make sure Results are legal and update the translation cache.
+  SDValue RecursivelyLegalizeResults(SDValue Op,
+                                     MutableArrayRef<SDValue> Results);
+
+  /// Wrapper to interface LowerOperation with a vector of Results.
+  /// Returns false if the target wants to use default expansion. Otherwise
+  /// returns true. If return is true and the Results are empty, then the
+  /// target wants to keep the input node as is.
+  bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
 
   /// Implements unrolling a VSETCC.
-  SDValue UnrollVSETCC(SDValue Op);
+  SDValue UnrollVSETCC(SDNode *Node);
 
   /// Implement expand-based legalization of vector operations.
   ///
   /// This is just a high-level routine to dispatch to specific code paths for
   /// operations to legalize them.
-  SDValue Expand(SDValue Op);
+  void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
   /// FP_TO_SINT isn't legal.
-  SDValue ExpandFP_TO_UINT(SDValue Op);
+  void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
-  SDValue ExpandUINT_TO_FLOAT(SDValue Op);
+  void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
-  SDValue ExpandSEXTINREG(SDValue Op);
+  SDValue ExpandSEXTINREG(SDNode *Node);
 
   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
   ///
   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
   /// type. The contents of the bits in the extended part of each element are
   /// undef.
-  SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+  SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
 
   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
   ///
   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
   /// type, then shifts left and arithmetic shifts right to introduce a sign
   /// extension.
-  SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+  SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
 
   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
   ///
   /// Shuffles the low lanes of the operand into place and blends zeros into
   /// the remaining lanes, finally bitcasting to the proper type.
-  SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
-
-  /// Implement expand-based legalization of ABS vector operations.
-  /// If following expanding is legal/custom then do it:
-  /// (ABS x) --> (XOR (ADD x, (SRA x, sizeof(x)-1)), (SRA x, sizeof(x)-1))
-  /// else unroll the operation.
-  SDValue ExpandABS(SDValue Op);
+  SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
 
   /// Expand bswap of vectors into a shuffle if legal.
-  SDValue ExpandBSWAP(SDValue Op);
+  SDValue ExpandBSWAP(SDNode *Node);
 
   /// Implement vselect in terms of XOR, AND, OR when blend is not
   /// supported by the target.
-  SDValue ExpandVSELECT(SDValue Op);
-  SDValue ExpandSELECT(SDValue Op);
-  SDValue ExpandLoad(SDValue Op);
-  SDValue ExpandStore(SDValue Op);
-  SDValue ExpandFNEG(SDValue Op);
-  SDValue ExpandFSUB(SDValue Op);
-  SDValue ExpandBITREVERSE(SDValue Op);
-  SDValue ExpandCTPOP(SDValue Op);
-  SDValue ExpandCTLZ(SDValue Op);
-  SDValue ExpandCTTZ(SDValue Op);
-  SDValue ExpandFunnelShift(SDValue Op);
-  SDValue ExpandROT(SDValue Op);
-  SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
-  SDValue ExpandUADDSUBO(SDValue Op);
-  SDValue ExpandSADDSUBO(SDValue Op);
-  SDValue ExpandMULO(SDValue Op);
-  SDValue ExpandAddSubSat(SDValue Op);
-  SDValue ExpandFixedPointMul(SDValue Op);
-  SDValue ExpandStrictFPOp(SDValue Op);
+  SDValue ExpandVSELECT(SDNode *Node);
+  SDValue ExpandSELECT(SDNode *Node);
+  std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
+  SDValue ExpandStore(SDNode *N);
+  SDValue ExpandFNEG(SDNode *Node);
+  void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  SDValue ExpandFixedPointDiv(SDNode *Node);
+  SDValue ExpandStrictFPOp(SDNode *Node);
+  void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+  void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   /// Implements vector promotion.
   ///
   /// This is essentially just bitcasting the operands to a different type and
   /// bitcasting the result back to the original type.
-  SDValue Promote(SDValue Op);
+  void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   /// Implements [SU]INT_TO_FP vector promotion.
   ///
   /// This is a [zs]ext of the input operand to a larger integer type.
-  SDValue PromoteINT_TO_FP(SDValue Op);
+  void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   /// Implements FP_TO_[SU]INT vector promotion of the result type.
   ///
   /// It is promoted to a larger integer type.  The result is then
   /// truncated back to the original type.
-  SDValue PromoteFP_TO_INT(SDValue Op);
+  void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
 public:
   VectorLegalizer(SelectionDAG& dag) :
@@ -219,11 +219,27 @@ bool VectorLegalizer::Run() {
   return Changed;
 }
 
-SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
+  assert(Op->getNumValues() == Result->getNumValues() &&
+         "Unexpected number of results");
   // Generic legalization: just pass the operand through.
-  for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
-    AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
-  return Result.getValue(Op.getResNo());
+  for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
+    AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
+  return SDValue(Result, Op.getResNo());
+}
+
+SDValue
+VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
+                                            MutableArrayRef<SDValue> Results) {
+  assert(Results.size() == Op->getNumValues() &&
+         "Unexpected number of results");
+  // Make sure that the generated code is itself legal.
+  for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+    Results[i] = LegalizeOp(Results[i]);
+    AddLegalizedOperand(Op.getValue(i), Results[i]);
+  }
+
+  return Results[Op.getResNo()];
 }
 
 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
@@ -232,18 +248,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
   if (I != LegalizedNodes.end()) return I->second;
 
-  SDNode* Node = Op.getNode();
-
   // Legalize the operands
   SmallVector<SDValue, 8> Ops;
-  for (const SDValue &Op : Node->op_values())
-    Ops.push_back(LegalizeOp(Op));
+  for (const SDValue &Oper : Op->op_values())
+    Ops.push_back(LegalizeOp(Oper));
 
-  SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
-                           Op.getResNo());
+  SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
 
   if (Op.getOpcode() == ISD::LOAD) {
-    LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
     ISD::LoadExtType ExtType = LD->getExtensionType();
     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
       LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
@@ -252,26 +265,29 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
                                    LD->getMemoryVT())) {
       default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
-        return TranslateLegalizeResults(Op, Result);
-      case TargetLowering::Custom:
-        if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
-          assert(Lowered->getNumValues() == Op->getNumValues() &&
-                 "Unexpected number of results");
-          if (Lowered != Result) {
-            // Make sure the new code is also legal.
-            Lowered = LegalizeOp(Lowered);
-            Changed = true;
-          }
-          return TranslateLegalizeResults(Op, Lowered);
+        return TranslateLegalizeResults(Op, Node);
+      case TargetLowering::Custom: {
+        SmallVector<SDValue, 2> ResultVals;
+        if (LowerOperationWrapper(Node, ResultVals)) {
+          if (ResultVals.empty())
+            return TranslateLegalizeResults(Op, Node);
+
+          Changed = true;
+          return RecursivelyLegalizeResults(Op, ResultVals);
         }
         LLVM_FALLTHROUGH;
-      case TargetLowering::Expand:
+      }
+      case TargetLowering::Expand: {
         Changed = true;
-        return ExpandLoad(Op);
+        std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
+        AddLegalizedOperand(Op.getValue(0), Tmp.first);
+        AddLegalizedOperand(Op.getValue(1), Tmp.second);
+        return Op.getResNo() ? Tmp.first : Tmp.second;
+      }
       }
     }
   } else if (Op.getOpcode() == ISD::STORE) {
-    StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+    StoreSDNode *ST = cast<StoreSDNode>(Node);
     EVT StVT = ST->getMemoryVT();
     MVT ValVT = ST->getValue().getSimpleValueType();
     if (StVT.isVector() && ST->isTruncatingStore()) {
@@ -280,19 +296,24 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
       default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
-        return TranslateLegalizeResults(Op, Result);
+        return TranslateLegalizeResults(Op, Node);
       case TargetLowering::Custom: {
-        SDValue Lowered = TLI.LowerOperation(Result, DAG);
-        if (Lowered != Result) {
-          // Make sure the new code is also legal.
-          Lowered = LegalizeOp(Lowered);
+        SmallVector<SDValue, 1> ResultVals;
+        if (LowerOperationWrapper(Node, ResultVals)) {
+          if (ResultVals.empty())
+            return TranslateLegalizeResults(Op, Node);
+
           Changed = true;
+          return RecursivelyLegalizeResults(Op, ResultVals);
         }
-        return TranslateLegalizeResults(Op, Lowered);
+        LLVM_FALLTHROUGH;
       }
-      case TargetLowering::Expand:
+      case TargetLowering::Expand: {
         Changed = true;
-        return ExpandStore(Op);
+        SDValue Chain = ExpandStore(Node);
+        AddLegalizedOperand(Op, Chain);
+        return Chain;
+      }
       }
     }
   }
@@ -300,55 +321,41 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   bool HasVectorValueOrOp = false;
   for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
     HasVectorValueOrOp |= J->isVector();
-  for (const SDValue &Op : Node->op_values())
-    HasVectorValueOrOp |= Op.getValueType().isVector();
+  for (const SDValue &Oper : Node->op_values())
+    HasVectorValueOrOp |= Oper.getValueType().isVector();
 
   if (!HasVectorValueOrOp)
-    return TranslateLegalizeResults(Op, Result);
+    return TranslateLegalizeResults(Op, Node);
 
   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+  EVT ValVT;
   switch (Op.getOpcode()) {
   default:
-    return TranslateLegalizeResults(Op, Result);
-  case ISD::STRICT_FADD:
-  case ISD::STRICT_FSUB:
-  case ISD::STRICT_FMUL:
-  case ISD::STRICT_FDIV:
-  case ISD::STRICT_FREM:
-  case ISD::STRICT_FSQRT:
-  case ISD::STRICT_FMA:
-  case ISD::STRICT_FPOW:
-  case ISD::STRICT_FPOWI:
-  case ISD::STRICT_FSIN:
-  case ISD::STRICT_FCOS:
-  case ISD::STRICT_FEXP:
-  case ISD::STRICT_FEXP2:
-  case ISD::STRICT_FLOG:
-  case ISD::STRICT_FLOG10:
-  case ISD::STRICT_FLOG2:
-  case ISD::STRICT_FRINT:
-  case ISD::STRICT_FNEARBYINT:
-  case ISD::STRICT_FMAXNUM:
-  case ISD::STRICT_FMINNUM:
-  case ISD::STRICT_FCEIL:
-  case ISD::STRICT_FFLOOR:
-  case ISD::STRICT_FROUND:
-  case ISD::STRICT_FTRUNC:
-  case ISD::STRICT_FP_TO_SINT:
-  case ISD::STRICT_FP_TO_UINT:
-  case ISD::STRICT_FP_ROUND:
-  case ISD::STRICT_FP_EXTEND:
+    return TranslateLegalizeResults(Op, Node);
+  case ISD::MERGE_VALUES:
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    // This operation lies about being legal: when it claims to be legal,
+    // it should actually be expanded.
+    if (Action == TargetLowering::Legal)
+      Action = TargetLowering::Expand;
+    break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+    ValVT = Node->getValueType(0);
+    if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+        Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
+      ValVT = Node->getOperand(1).getValueType();
+    Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
     // If we're asked to expand a strict vector floating-point operation,
     // by default we're going to simply unroll it.  That is usually the
     // best approach, except in the case where the resulting strict (scalar)
     // operations would themselves use the fallback mutation to non-strict.
     // In that specific case, just do the fallback on the vector op.
-    if (Action == TargetLowering::Expand &&
-        TLI.getStrictFPOperationAction(Node->getOpcode(),
-                                       Node->getValueType(0))
-        == TargetLowering::Legal) {
-      EVT EltVT = Node->getValueType(0).getVectorElementType();
+    if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
+        TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
+            TargetLowering::Legal) {
+      EVT EltVT = ValVT.getVectorElementType();
       if (TLI.getOperationAction(Node->getOpcode(), EltVT)
           == TargetLowering::Expand &&
           TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
@@ -454,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SMULFIX:
   case ISD::SMULFIXSAT:
   case ISD::UMULFIX:
-  case ISD::UMULFIXSAT: {
+  case ISD::UMULFIXSAT:
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX: {
     unsigned Scale = Node->getConstantOperandVal(2);
     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
                                               Node->getValueType(0), Scale);
@@ -482,53 +491,90 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
 
   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
 
+  SmallVector<SDValue, 8> ResultVals;
   switch (Action) {
   default: llvm_unreachable("This action is not supported yet!");
   case TargetLowering::Promote:
-    Result = Promote(Op);
-    Changed = true;
+    LLVM_DEBUG(dbgs() << "Promoting\n");
+    Promote(Node, ResultVals);
+    assert(!ResultVals.empty() && "No results for promotion?");
     break;
   case TargetLowering::Legal:
     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
     break;
-  case TargetLowering::Custom: {
+  case TargetLowering::Custom:
     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
-    if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
-      LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
-      Result = Tmp1;
+    if (LowerOperationWrapper(Node, ResultVals))
       break;
-    }
     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
     LLVM_FALLTHROUGH;
-  }
   case TargetLowering::Expand:
-    Result = Expand(Op);
+    LLVM_DEBUG(dbgs() << "Expanding\n");
+    Expand(Node, ResultVals);
+    break;
   }
 
-  // Make sure that the generated code is itself legal.
-  if (Result != Op) {
-    Result = LegalizeOp(Result);
-    Changed = true;
+  if (ResultVals.empty())
+    return TranslateLegalizeResults(Op, Node);
+
+  Changed = true;
+  return RecursivelyLegalizeResults(Op, ResultVals);
+}
+
+// FIME: This is very similar to the X86 override of
+// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
+bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
+                                            SmallVectorImpl<SDValue> &Results) {
+  SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+
+  if (!Res.getNode())
+    return false;
+
+  if (Res == SDValue(Node, 0))
+    return true;
+
+  // If the original node has one result, take the return value from
+  // LowerOperation as is. It might not be result number 0.
+  if (Node->getNumValues() == 1) {
+    Results.push_back(Res);
+    return true;
   }
 
-  // Note that LegalizeOp may be reentered even from single-use nodes, which
-  // means that we always must cache transformed nodes.
-  AddLegalizedOperand(Op, Result);
-  return Result;
+  // If the original node has multiple results, then the return node should
+  // have the same number of results.
+  assert((Node->getNumValues() == Res->getNumValues()) &&
+         "Lowering returned the wrong number of results!");
+
+  // Places new result values base on N result number.
+  for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
+    Results.push_back(Res.getValue(I));
+
+  return true;
 }
 
-SDValue VectorLegalizer::Promote(SDValue Op) {
+void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   // For a few operations there is a specific concept for promotion based on
   // the operand's type.
-  switch (Op.getOpcode()) {
+  switch (Node->getOpcode()) {
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
     // "Promote" the operation by extending the operand.
-    return PromoteINT_TO_FP(Op);
+    PromoteINT_TO_FP(Node, Results);
+    return;
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
+  case ISD::STRICT_FP_TO_SINT:
     // Promote the operation by extending the operand.
-    return PromoteFP_TO_INT(Op);
+    PromoteFP_TO_INT(Node, Results);
+    return;
+  case ISD::FP_ROUND:
+  case ISD::FP_EXTEND:
+    // These operations are used to do promotion so they can't be promoted
+    // themselves.
+    llvm_unreachable("Don't know how to promote this operation!");
   }
 
   // There are currently two cases of vector promotion:
@@ -536,91 +582,128 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
   // 2) Extending a vector of floats to a vector of the same number of larger
   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
-  MVT VT = Op.getSimpleValueType();
-  assert(Op.getNode()->getNumValues() == 1 &&
+  assert(Node->getNumValues() == 1 &&
          "Can't promote a vector with multiple results!");
-  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
-  SDLoc dl(Op);
-  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
-
-  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
-    if (Op.getOperand(j).getValueType().isVector())
-      if (Op.getOperand(j)
+  MVT VT = Node->getSimpleValueType(0);
+  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+  SDLoc dl(Node);
+  SmallVector<SDValue, 4> Operands(Node->getNumOperands());
+
+  for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+    if (Node->getOperand(j).getValueType().isVector())
+      if (Node->getOperand(j)
               .getValueType()
               .getVectorElementType()
               .isFloatingPoint() &&
           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
-        Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
+        Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
       else
-        Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+        Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
     else
-      Operands[j] = Op.getOperand(j);
+      Operands[j] = Node->getOperand(j);
   }
 
-  Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
+  SDValue Res =
+      DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
+
   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
-    return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
+    Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl));
   else
-    return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+    Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
+
+  Results.push_back(Res);
 }
 
-SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
+void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
   // INT_TO_FP operations may require the input operand be promoted even
   // when the type is otherwise legal.
-  MVT VT = Op.getOperand(0).getSimpleValueType();
-  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  bool IsStrict = Node->isStrictFPOpcode();
+  MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
+  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
          "Vectors have different number of elements!");
 
-  SDLoc dl(Op);
-  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+  SDLoc dl(Node);
+  SmallVector<SDValue, 4> Operands(Node->getNumOperands());
 
-  unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
-    ISD::SIGN_EXTEND;
-  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
-    if (Op.getOperand(j).getValueType().isVector())
-      Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
+  unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
+                  Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
+                     ? ISD::ZERO_EXTEND
+                     : ISD::SIGN_EXTEND;
+  for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
+    if (Node->getOperand(j).getValueType().isVector())
+      Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
     else
-      Operands[j] = Op.getOperand(j);
+      Operands[j] = Node->getOperand(j);
+  }
+
+  if (IsStrict) {
+    SDValue Res = DAG.getNode(Node->getOpcode(), dl,
+                              {Node->getValueType(0), MVT::Other}, Operands);
+    Results.push_back(Res);
+    Results.push_back(Res.getValue(1));
+    return;
   }
 
-  return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
+  SDValue Res =
+      DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
+  Results.push_back(Res);
 }
 
 // For FP_TO_INT we promote the result type to a vector type with wider
 // elements and then truncate the result.  This is different from the default
 // PromoteVector which uses bitcast to promote thus assumning that the
 // promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
-  MVT VT = Op.getSimpleValueType();
-  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
+  MVT VT = Node->getSimpleValueType(0);
+  MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+  bool IsStrict = Node->isStrictFPOpcode();
   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
          "Vectors have different number of elements!");
 
-  unsigned NewOpc = Op->getOpcode();
+  unsigned NewOpc = Node->getOpcode();
   // Change FP_TO_UINT to FP_TO_SINT if possible.
   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
   if (NewOpc == ISD::FP_TO_UINT &&
       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
     NewOpc = ISD::FP_TO_SINT;
 
-  SDLoc dl(Op);
-  SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
+  if (NewOpc == ISD::STRICT_FP_TO_UINT &&
+      TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+    NewOpc = ISD::STRICT_FP_TO_SINT;
+
+  SDLoc dl(Node);
+  SDValue Promoted, Chain;
+  if (IsStrict) {
+    Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
+                           {Node->getOperand(0), Node->getOperand(1)});
+    Chain = Promoted.getValue(1);
+  } else
+    Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
 
   // Assert that the converted value fits in the original type.  If it doesn't
   // (eg: because the value being converted is too big), then the result of the
   // original operation was undefined anyway, so the assert is still correct.
-  Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
-                                                            : ISD::AssertSext,
-                         dl, NVT, Promoted,
+  if (Node->getOpcode() == ISD::FP_TO_UINT ||
+      Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
+    NewOpc = ISD::AssertZext;
+  else
+    NewOpc = ISD::AssertSext;
+
+  Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
                          DAG.getValueType(VT.getScalarType()));
-  return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+  Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
+  Results.push_back(Promoted);
+  if (IsStrict)
+    Results.push_back(Chain);
 }
 
-SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
-  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
 
   EVT SrcVT = LD->getMemoryVT();
   EVT SrcEltVT = SrcVT.getScalarType();
@@ -629,7 +712,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
   SDValue NewChain;
   SDValue Value;
   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
-    SDLoc dl(Op);
+    SDLoc dl(N);
 
     SmallVector<SDValue, 8> Vals;
     SmallVector<SDValue, 8> LoadChains;
@@ -741,130 +824,157 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
     }
 
     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
-    Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
+    Value = DAG.getBuildVector(N->getValueType(0), dl, Vals);
   } else {
-    SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
-    // Skip past MERGE_VALUE node if known.
-    if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
-      NewChain = Scalarized.getOperand(1);
-      Value = Scalarized.getOperand(0);
-    } else {
-      NewChain = Scalarized.getValue(1);
-      Value = Scalarized.getValue(0);
-    }
+    std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
   }
 
-  AddLegalizedOperand(Op.getValue(0), Value);
-  AddLegalizedOperand(Op.getValue(1), NewChain);
-
-  return (Op.getResNo() ? NewChain : Value);
+  return std::make_pair(Value, NewChain);
 }
 
-SDValue VectorLegalizer::ExpandStore(SDValue Op) {
-  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+SDValue VectorLegalizer::ExpandStore(SDNode *N) {
+  StoreSDNode *ST = cast<StoreSDNode>(N);
   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
-  AddLegalizedOperand(Op, TF);
   return TF;
 }
 
-SDValue VectorLegalizer::Expand(SDValue Op) {
-  switch (Op->getOpcode()) {
+void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+  SDValue Tmp;
+  switch (Node->getOpcode()) {
+  case ISD::MERGE_VALUES:
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      Results.push_back(Node->getOperand(i));
+    return;
   case ISD::SIGN_EXTEND_INREG:
-    return ExpandSEXTINREG(Op);
+    Results.push_back(ExpandSEXTINREG(Node));
+    return;
   case ISD::ANY_EXTEND_VECTOR_INREG:
-    return ExpandANY_EXTEND_VECTOR_INREG(Op);
+    Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
+    return;
   case ISD::SIGN_EXTEND_VECTOR_INREG:
-    return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+    Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
+    return;
   case ISD::ZERO_EXTEND_VECTOR_INREG:
-    return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+    Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
+    return;
   case ISD::BSWAP:
-    return ExpandBSWAP(Op);
+    Results.push_back(ExpandBSWAP(Node));
+    return;
   case ISD::VSELECT:
-    return ExpandVSELECT(Op);
+    Results.push_back(ExpandVSELECT(Node));
+    return;
   case ISD::SELECT:
-    return ExpandSELECT(Op);
+    Results.push_back(ExpandSELECT(Node));
+    return;
   case ISD::FP_TO_UINT:
-    return ExpandFP_TO_UINT(Op);
+    ExpandFP_TO_UINT(Node, Results);
+    return;
   case ISD::UINT_TO_FP:
-    return ExpandUINT_TO_FLOAT(Op);
+    ExpandUINT_TO_FLOAT(Node, Results);
+    return;
   case ISD::FNEG:
-    return ExpandFNEG(Op);
+    Results.push_back(ExpandFNEG(Node));
+    return;
   case ISD::FSUB:
-    return ExpandFSUB(Op);
+    ExpandFSUB(Node, Results);
+    return;
   case ISD::SETCC:
-    return UnrollVSETCC(Op);
+    Results.push_back(UnrollVSETCC(Node));
+    return;
   case ISD::ABS:
-    return ExpandABS(Op);
+    if (TLI.expandABS(Node, Tmp, DAG)) {
+      Results.push_back(Tmp);
+      return;
+    }
+    break;
   case ISD::BITREVERSE:
-    return ExpandBITREVERSE(Op);
+    ExpandBITREVERSE(Node, Results);
+    return;
   case ISD::CTPOP:
-    return ExpandCTPOP(Op);
+    if (TLI.expandCTPOP(Node, Tmp, DAG)) {
+      Results.push_back(Tmp);
+      return;
+    }
+    break;
   case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:
-    return ExpandCTLZ(Op);
+    if (TLI.expandCTLZ(Node, Tmp, DAG)) {
+      Results.push_back(Tmp);
+      return;
+    }
+    break;
   case ISD::CTTZ:
   case ISD::CTTZ_ZERO_UNDEF:
-    return ExpandCTTZ(Op);
+    if (TLI.expandCTTZ(Node, Tmp, DAG)) {
+      Results.push_back(Tmp);
+      return;
+    }
+    break;
   case ISD::FSHL:
   case ISD::FSHR:
-    return ExpandFunnelShift(Op);
+    if (TLI.expandFunnelShift(Node, Tmp, DAG)) {
+      Results.push_back(Tmp);
+      return;
+    }
+    break;
   case ISD::ROTL:
   case ISD::ROTR:
-    return ExpandROT(Op);
+    if (TLI.expandROT(Node, Tmp, DAG)) {
+      Results.push_back(Tmp);
+      return;
+    }
+    break;
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-    return ExpandFMINNUM_FMAXNUM(Op);
+    if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
+      Results.push_back(Expanded);
+      return;
+    }
+    break;
   case ISD::UADDO:
   case ISD::USUBO:
-    return ExpandUADDSUBO(Op);
+    ExpandUADDSUBO(Node, Results);
+    return;
   case ISD::SADDO:
   case ISD::SSUBO:
-    return ExpandSADDSUBO(Op);
+    ExpandSADDSUBO(Node, Results);
+    return;
   case ISD::UMULO:
   case ISD::SMULO:
-    return ExpandMULO(Op);
+    ExpandMULO(Node, Results);
+    return;
   case ISD::USUBSAT:
   case ISD::SSUBSAT:
   case ISD::UADDSAT:
   case ISD::SADDSAT:
-    return ExpandAddSubSat(Op);
+    if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
+      Results.push_back(Expanded);
+      return;
+    }
+    break;
   case ISD::SMULFIX:
   case ISD::UMULFIX:
-    return ExpandFixedPointMul(Op);
+    if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
+      Results.push_back(Expanded);
+      return;
+    }
+    break;
   case ISD::SMULFIXSAT:
   case ISD::UMULFIXSAT:
     // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
     // why. Maybe it results in worse codegen compared to the unroll for some
     // targets? This should probably be investigated. And if we still prefer to
     // unroll an explanation could be helpful.
-    return DAG.UnrollVectorOp(Op.getNode());
-  case ISD::STRICT_FADD:
-  case ISD::STRICT_FSUB:
-  case ISD::STRICT_FMUL:
-  case ISD::STRICT_FDIV:
-  case ISD::STRICT_FREM:
-  case ISD::STRICT_FSQRT:
-  case ISD::STRICT_FMA:
-  case ISD::STRICT_FPOW:
-  case ISD::STRICT_FPOWI:
-  case ISD::STRICT_FSIN:
-  case ISD::STRICT_FCOS:
-  case ISD::STRICT_FEXP:
-  case ISD::STRICT_FEXP2:
-  case ISD::STRICT_FLOG:
-  case ISD::STRICT_FLOG10:
-  case ISD::STRICT_FLOG2:
-  case ISD::STRICT_FRINT:
-  case ISD::STRICT_FNEARBYINT:
-  case ISD::STRICT_FMAXNUM:
-  case ISD::STRICT_FMINNUM:
-  case ISD::STRICT_FCEIL:
-  case ISD::STRICT_FFLOOR:
-  case ISD::STRICT_FROUND:
-  case ISD::STRICT_FTRUNC:
-  case ISD::STRICT_FP_TO_SINT:
-  case ISD::STRICT_FP_TO_UINT:
-    return ExpandStrictFPOp(Op);
+    break;
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX:
+    Results.push_back(ExpandFixedPointDiv(Node));
+    return;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
+    ExpandStrictFPOp(Node, Results);
+    return;
   case ISD::VECREDUCE_ADD:
   case ISD::VECREDUCE_MUL:
   case ISD::VECREDUCE_AND:
@@ -878,22 +988,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
   case ISD::VECREDUCE_FMUL:
   case ISD::VECREDUCE_FMAX:
   case ISD::VECREDUCE_FMIN:
-    return TLI.expandVecReduce(Op.getNode(), DAG);
-  default:
-    return DAG.UnrollVectorOp(Op.getNode());
+    Results.push_back(TLI.expandVecReduce(Node, DAG));
+    return;
   }
+
+  Results.push_back(DAG.UnrollVectorOp(Node));
 }
 
-SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
+SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
   // Lower a select instruction where the condition is a scalar and the
   // operands are vectors. Lower this select to VSELECT and implement it
   // using XOR AND OR. The selector bit is broadcasted.
-  EVT VT = Op.getValueType();
-  SDLoc DL(Op);
+  EVT VT = Node->getValueType(0);
+  SDLoc DL(Node);
 
-  SDValue Mask = Op.getOperand(0);
-  SDValue Op1 = Op.getOperand(1);
-  SDValue Op2 = Op.getOperand(2);
+  SDValue Mask = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  SDValue Op2 = Node->getOperand(2);
 
   assert(VT.isVector() && !Mask.getValueType().isVector()
          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
@@ -907,7 +1018,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
-    return DAG.UnrollVectorOp(Op.getNode());
+    return DAG.UnrollVectorOp(Node);
 
   // Generate a mask operand.
   EVT MaskTy = VT.changeVectorElementTypeToInteger();
@@ -936,36 +1047,35 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
-  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
+  return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
 }
 
-SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
-  EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
+  EVT VT = Node->getValueType(0);
 
   // Make sure that the SRA and SHL instructions are available.
   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
-    return DAG.UnrollVectorOp(Op.getNode());
+    return DAG.UnrollVectorOp(Node);
 
-  SDLoc DL(Op);
-  EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
+  SDLoc DL(Node);
+  EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
 
   unsigned BW = VT.getScalarSizeInBits();
   unsigned OrigBW = OrigTy.getScalarSizeInBits();
   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
 
-  Op = Op.getOperand(0);
-  Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
+  SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
 }
 
 // Generically expand a vector anyext in register to a shuffle of the relevant
 // lanes into the appropriate locations, with other lanes left undef.
-SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
-  SDLoc DL(Op);
-  EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
   int NumElements = VT.getVectorNumElements();
-  SDValue Src = Op.getOperand(0);
+  SDValue Src = Node->getOperand(0);
   EVT SrcVT = Src.getValueType();
   int NumSrcElements = SrcVT.getVectorNumElements();
 
@@ -997,15 +1107,15 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
 }
 
-SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
-  SDLoc DL(Op);
-  EVT VT = Op.getValueType();
-  SDValue Src = Op.getOperand(0);
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
+  SDValue Src = Node->getOperand(0);
   EVT SrcVT = Src.getValueType();
 
   // First build an any-extend node which can be legalized above when we
   // recurse through it.
-  Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
+  SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
 
   // Now we need sign extend. Do this by shifting the elements. Even if these
   // aren't legal operations, they have a better chance of being legalized
@@ -1021,11 +1131,11 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
 // Generically expand a vector zext in register to a shuffle of the relevant
 // lanes into the appropriate locations, a blend of zero into the high bits,
 // and a bitcast to the wider element type.
-SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
-  SDLoc DL(Op);
-  EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
+  SDLoc DL(Node);
+  EVT VT = Node->getValueType(0);
   int NumElements = VT.getVectorNumElements();
-  SDValue Src = Op.getOperand(0);
+  SDValue Src = Node->getOperand(0);
   EVT SrcVT = Src.getValueType();
   int NumSrcElements = SrcVT.getVectorNumElements();
 
@@ -1068,8 +1178,8 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
 }
 
-SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
-  EVT VT = Op.getValueType();
+SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
+  EVT VT = Node->getValueType(0);
 
   // Generate a byte wise shuffle mask for the BSWAP.
   SmallVector<int, 16> ShuffleMask;
@@ -1078,20 +1188,24 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
 
   // Only emit a shuffle if the mask is legal.
   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
-    return DAG.UnrollVectorOp(Op.getNode());
+    return DAG.UnrollVectorOp(Node);
 
-  SDLoc DL(Op);
-  Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+  SDLoc DL(Node);
+  SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
 }
 
-SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
-  EVT VT = Op.getValueType();
+void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
+  EVT VT = Node->getValueType(0);
 
   // If we have the scalar operation, it's probably cheaper to unroll it.
-  if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
-    return DAG.UnrollVectorOp(Op.getNode());
+  if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
+    SDValue Tmp = DAG.UnrollVectorOp(Node);
+    Results.push_back(Tmp);
+    return;
+  }
 
   // If the vector element width is a whole number of bytes, test if its legal
   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
@@ -1108,35 +1222,39 @@ SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
-      SDLoc DL(Op);
-      Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+      SDLoc DL(Node);
+      SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
                                 BSWAPMask);
       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
-      return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+      Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
+      Results.push_back(Op);
+      return;
     }
   }
 
   // If we have the appropriate vector bit operations, it is better to use them
   // than unrolling and expanding each component.
-  if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
-      !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
-      !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
-    return DAG.UnrollVectorOp(Op.getNode());
-
-  // Let LegalizeDAG handle this later.
-  return Op;
+  if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+      TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+      TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+    // Let LegalizeDAG handle this later.
+    return;
+
+  // Otherwise unroll.
+  SDValue Tmp = DAG.UnrollVectorOp(Node);
+  Results.push_back(Tmp);
 }
 
-SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
+SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
   // Implement VSELECT in terms of XOR, AND, OR
   // on platforms which do not support blend natively.
-  SDLoc DL(Op);
+  SDLoc DL(Node);
 
-  SDValue Mask = Op.getOperand(0);
-  SDValue Op1 = Op.getOperand(1);
-  SDValue Op2 = Op.getOperand(2);
+  SDValue Mask = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  SDValue Op2 = Node->getOperand(2);
 
   EVT VT = Mask.getValueType();
 
@@ -1152,13 +1270,13 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
       TLI.getBooleanContents(Op1.getValueType()) !=
           TargetLowering::ZeroOrNegativeOneBooleanContent)
-    return DAG.UnrollVectorOp(Op.getNode());
+    return DAG.UnrollVectorOp(Node);
 
   // If the mask and the type are different sizes, unroll the vector op. This
   // can occur when getSetCCResultType returns something that is different in
   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
-    return DAG.UnrollVectorOp(Op.getNode());
+    return DAG.UnrollVectorOp(Node);
 
   // Bitcast the operands to be the same type as the mask.
   // This is needed when we select between FP types because
@@ -1173,46 +1291,61 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
-  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
-}
-
-SDValue VectorLegalizer::ExpandABS(SDValue Op) {
-  // Attempt to expand using TargetLowering.
-  SDValue Result;
-  if (TLI.expandABS(Op.getNode(), Result, DAG))
-    return Result;
-
-  // Otherwise go ahead and unroll.
-  return DAG.UnrollVectorOp(Op.getNode());
+  return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
 }
 
-SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
+void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
   // Attempt to expand using TargetLowering.
   SDValue Result, Chain;
-  if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
-    if (Op.getNode()->isStrictFPOpcode())
-      // Relink the chain
-      DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
-    return Result;
+  if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
+    Results.push_back(Result);
+    if (Node->isStrictFPOpcode())
+      Results.push_back(Chain);
+    return;
   }
 
   // Otherwise go ahead and unroll.
-  return DAG.UnrollVectorOp(Op.getNode());
+  if (Node->isStrictFPOpcode()) {
+    UnrollStrictFPOp(Node, Results);
+    return;
+  }
+
+  Results.push_back(DAG.UnrollVectorOp(Node));
 }
 
-SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
-  EVT VT = Op.getOperand(0).getValueType();
-  SDLoc DL(Op);
+void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
+  bool IsStrict = Node->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
+  SDValue Src = Node->getOperand(OpNo);
+  EVT VT = Src.getValueType();
+  SDLoc DL(Node);
 
   // Attempt to expand using TargetLowering.
   SDValue Result;
-  if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG))
-    return Result;
+  SDValue Chain;
+  if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
+    Results.push_back(Result);
+    if (IsStrict)
+      Results.push_back(Chain);
+    return;
+  }
 
   // Make sure that the SINT_TO_FP and SRL instructions are available.
-  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
-      TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
-    return DAG.UnrollVectorOp(Op.getNode());
+  if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
+                         TargetLowering::Expand) ||
+       (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
+                        TargetLowering::Expand)) ||
+      TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
+    if (IsStrict) {
+      UnrollStrictFPOp(Node, Results);
+      return;
+    }
+
+    Results.push_back(DAG.UnrollVectorOp(Node));
+    return;
+  }
 
   unsigned BW = VT.getScalarSizeInBits();
   assert((BW == 64 || BW == 32) &&
@@ -1227,153 +1360,141 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
 
   // Two to the power of half-word-size.
-  SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
+  SDValue TWOHW =
+      DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
 
   // Clear upper part of LO, lower HI
-  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
-  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
+  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
+  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
+
+  if (IsStrict) {
+    // Convert hi and lo to floats
+    // Convert the hi part back to the upper values
+    // TODO: Can any fast-math-flags be set on these nodes?
+    SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+                              {Node->getValueType(0), MVT::Other},
+                              {Node->getOperand(0), HI});
+    fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
+                      {fHI.getValue(1), fHI, TWOHW});
+    SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
+                              {Node->getValueType(0), MVT::Other},
+                              {Node->getOperand(0), LO});
+
+    SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
+                             fLO.getValue(1));
+
+    // Add the two halves
+    SDValue Result =
+        DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
+                    {TF, fHI, fLO});
+
+    Results.push_back(Result);
+    Results.push_back(Result.getValue(1));
+    return;
+  }
 
   // Convert hi and lo to floats
   // Convert the hi part back to the upper values
   // TODO: Can any fast-math-flags be set on these nodes?
-  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
-          fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
-  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
+  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
+  fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
+  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
 
   // Add the two halves
-  return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
+  Results.push_back(
+      DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
 }
 
-SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
-  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
-    SDLoc DL(Op);
-    SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
+  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
+    SDLoc DL(Node);
+    SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
-    return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
-                       Zero, Op.getOperand(0));
+    return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
+                       Node->getOperand(0));
   }
-  return DAG.UnrollVectorOp(Op.getNode());
+  return DAG.UnrollVectorOp(Node);
 }
 
-SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
+void VectorLegalizer::ExpandFSUB(SDNode *Node,
+                                 SmallVectorImpl<SDValue> &Results) {
   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
   // we can defer this to operation legalization where it will be lowered as
   // a+(-b).
-  EVT VT = Op.getValueType();
+  EVT VT = Node->getValueType(0);
   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
-    return Op; // Defer to LegalizeDAG
-
-  return DAG.UnrollVectorOp(Op.getNode());
-}
+    return; // Defer to LegalizeDAG
 
-SDValue VectorLegalizer::ExpandCTPOP(SDValue Op) {
-  SDValue Result;
-  if (TLI.expandCTPOP(Op.getNode(), Result, DAG))
-    return Result;
-
-  return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
-  SDValue Result;
-  if (TLI.expandCTLZ(Op.getNode(), Result, DAG))
-    return Result;
-
-  return DAG.UnrollVectorOp(Op.getNode());
+  SDValue Tmp = DAG.UnrollVectorOp(Node);
+  Results.push_back(Tmp);
 }
 
-SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
-  SDValue Result;
-  if (TLI.expandCTTZ(Op.getNode(), Result, DAG))
-    return Result;
-
-  return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandFunnelShift(SDValue Op) {
-  SDValue Result;
-  if (TLI.expandFunnelShift(Op.getNode(), Result, DAG))
-    return Result;
-
-  return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandROT(SDValue Op) {
-  SDValue Result;
-  if (TLI.expandROT(Op.getNode(), Result, DAG))
-    return Result;
-
-  return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
-  if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Op.getNode(), DAG))
-    return Expanded;
-  return DAG.UnrollVectorOp(Op.getNode());
-}
-
-SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) {
+void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
+                                     SmallVectorImpl<SDValue> &Results) {
   SDValue Result, Overflow;
-  TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG);
-
-  if (Op.getResNo() == 0) {
-    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
-    return Result;
-  } else {
-    AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
-    return Overflow;
-  }
+  TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+  Results.push_back(Result);
+  Results.push_back(Overflow);
 }
 
-SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) {
+void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
+                                     SmallVectorImpl<SDValue> &Results) {
   SDValue Result, Overflow;
-  TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG);
-
-  if (Op.getResNo() == 0) {
-    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
-    return Result;
-  } else {
-    AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
-    return Overflow;
-  }
+  TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+  Results.push_back(Result);
+  Results.push_back(Overflow);
 }
 
-SDValue VectorLegalizer::ExpandMULO(SDValue Op) {
+void VectorLegalizer::ExpandMULO(SDNode *Node,
+                                 SmallVectorImpl<SDValue> &Results) {
   SDValue Result, Overflow;
-  if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG))
-    std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode());
+  if (!TLI.expandMULO(Node, Result, Overflow, DAG))
+    std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
 
-  if (Op.getResNo() == 0) {
-    AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
-    return Result;
-  } else {
-    AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
-    return Overflow;
-  }
+  Results.push_back(Result);
+  Results.push_back(Overflow);
 }
 
-SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
-  if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
+SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) {
+  SDNode *N = Node;
+  if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
+          N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
     return Expanded;
-  return DAG.UnrollVectorOp(Op.getNode());
+  return DAG.UnrollVectorOp(N);
 }
 
-SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
-  if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
-    return Expanded;
-  return DAG.UnrollVectorOp(Op.getNode());
+void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
+  if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
+    ExpandUINT_TO_FLOAT(Node, Results);
+    return;
+  }
+  if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
+    ExpandFP_TO_UINT(Node, Results);
+    return;
+  }
+
+  UnrollStrictFPOp(Node, Results);
 }
 
-SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
-  EVT VT = Op.getValueType();
+void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
+  EVT VT = Node->getValueType(0);
   EVT EltVT = VT.getVectorElementType();
   unsigned NumElems = VT.getVectorNumElements();
-  unsigned NumOpers = Op.getNumOperands();
+  unsigned NumOpers = Node->getNumOperands();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT ValueVTs[] = {EltVT, MVT::Other};
-  SDValue Chain = Op.getOperand(0);
-  SDLoc dl(Op);
+
+  EVT TmpEltVT = EltVT;
+  if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+      Node->getOpcode() == ISD::STRICT_FSETCCS)
+    TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
+                                      *DAG.getContext(), TmpEltVT);
+
+  EVT ValueVTs[] = {TmpEltVT, MVT::Other};
+  SDValue Chain = Node->getOperand(0);
+  SDLoc dl(Node);
 
   SmallVector<SDValue, 32> OpValues;
   SmallVector<SDValue, 32> OpChains;
@@ -1387,7 +1508,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
 
     // Now process the remaining operands.
     for (unsigned j = 1; j < NumOpers; ++j) {
-      SDValue Oper = Op.getOperand(j);
+      SDValue Oper = Node->getOperand(j);
       EVT OperVT = Oper.getValueType();
 
       if (OperVT.isVector())
@@ -1397,28 +1518,37 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
       Opers.push_back(Oper);
     }
 
-    SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
+    SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
+    SDValue ScalarResult = ScalarOp.getValue(0);
+    SDValue ScalarChain = ScalarOp.getValue(1);
+
+    if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+        Node->getOpcode() == ISD::STRICT_FSETCCS)
+      ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
+                           DAG.getConstant(APInt::getAllOnesValue
+                                           (EltVT.getSizeInBits()), dl, EltVT),
+                           DAG.getConstant(0, dl, EltVT));
 
-    OpValues.push_back(ScalarOp.getValue(0));
-    OpChains.push_back(ScalarOp.getValue(1));
+    OpValues.push_back(ScalarResult);
+    OpChains.push_back(ScalarChain);
   }
 
   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
 
-  AddLegalizedOperand(Op.getValue(0), Result);
-  AddLegalizedOperand(Op.getValue(1), NewChain);
-
-  return Op.getResNo() ? NewChain : Result;
+  Results.push_back(Result);
+  Results.push_back(NewChain);
 }
 
-SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
-  EVT VT = Op.getValueType();
+SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
+  EVT VT = Node->getValueType(0);
   unsigned NumElems = VT.getVectorNumElements();
   EVT EltVT = VT.getVectorElementType();
-  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  SDValue CC = Node->getOperand(2);
   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
-  SDLoc dl(Op);
+  SDLoc dl(Node);
   SmallVector<SDValue, 8> Ops(NumElems);
   for (unsigned i = 0; i < NumElems; ++i) {
     SDValue LHSElem = DAG.getNode(
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 3763e886cef2..974914d00d05 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TypeSize.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "legalize-types"
@@ -50,7 +51,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;
   case ISD::BUILD_VECTOR:      R = ScalarizeVecRes_BUILD_VECTOR(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
-  case ISD::STRICT_FP_ROUND:   R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
   case ISD::FP_ROUND:          R = ScalarizeVecRes_FP_ROUND(N); break;
   case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
@@ -146,35 +146,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMA:
     R = ScalarizeVecRes_TernaryOp(N);
     break;
-  case ISD::STRICT_FADD:
-  case ISD::STRICT_FSUB:
-  case ISD::STRICT_FMUL:
-  case ISD::STRICT_FDIV:
-  case ISD::STRICT_FREM:
-  case ISD::STRICT_FSQRT:
-  case ISD::STRICT_FMA:
-  case ISD::STRICT_FPOW:
-  case ISD::STRICT_FPOWI:
-  case ISD::STRICT_FSIN:
-  case ISD::STRICT_FCOS:
-  case ISD::STRICT_FEXP:
-  case ISD::STRICT_FEXP2:
-  case ISD::STRICT_FLOG:
-  case ISD::STRICT_FLOG10:
-  case ISD::STRICT_FLOG2:
-  case ISD::STRICT_FRINT:
-  case ISD::STRICT_FNEARBYINT:
-  case ISD::STRICT_FMAXNUM:
-  case ISD::STRICT_FMINNUM:
-  case ISD::STRICT_FCEIL:
-  case ISD::STRICT_FFLOOR:
-  case ISD::STRICT_FROUND:
-  case ISD::STRICT_FTRUNC:
-  case ISD::STRICT_FP_TO_SINT:
-  case ISD::STRICT_FP_TO_UINT:
-  case ISD::STRICT_FP_EXTEND:
+
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
     R = ScalarizeVecRes_StrictFPOp(N);
     break;
+
   case ISD::UADDO:
   case ISD::SADDO:
   case ISD::USUBO:
@@ -187,7 +165,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SMULFIXSAT:
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:
-    R = ScalarizeVecRes_MULFIX(N);
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX:
+    R = ScalarizeVecRes_FIX(N);
     break;
   }
 
@@ -211,7 +191,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
                      Op0.getValueType(), Op0, Op1, Op2);
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
   SDValue Op0 = GetScalarizedVector(N->getOperand(0));
   SDValue Op1 = GetScalarizedVector(N->getOperand(1));
   SDValue Op2 = N->getOperand(2);
@@ -226,10 +206,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
   EVT ValueVTs[] = {VT, MVT::Other};
   SDLoc dl(N);
 
-  SmallVector<SDValue, 4> Opers;
+  SmallVector<SDValue, 4> Opers(NumOpers);
 
   // The Chain is the first operand.
-  Opers.push_back(Chain);
+  Opers[0] = Chain;
 
   // Now process the remaining operands.
   for (unsigned i = 1; i < NumOpers; ++i) {
@@ -238,7 +218,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
     if (Oper.getValueType().isVector())
       Oper = GetScalarizedVector(Oper);
 
-    Opers.push_back(Oper);
+    Opers[i] = Oper;
   }
 
   SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers);
@@ -326,18 +306,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
                      NewVT, Op, N->getOperand(1));
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
-  EVT NewVT = N->getValueType(0).getVectorElementType();
-  SDValue Op = GetScalarizedVector(N->getOperand(1));
-  SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
-                            { NewVT, MVT::Other }, 
-                            { N->getOperand(0), Op, N->getOperand(2) });
-  // Legalize the chain result - switch anything that used the old chain to
-  // use the new one.
-  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
-  return Res;
-}
-
 SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
   SDValue Op = GetScalarizedVector(N->getOperand(0));
   return DAG.getNode(ISD::FPOWI, SDLoc(N),
@@ -606,6 +574,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::UINT_TO_FP:
       Res = ScalarizeVecOp_UnaryOp(N);
       break;
+    case ISD::STRICT_SINT_TO_FP:
+    case ISD::STRICT_UINT_TO_FP:
     case ISD::STRICT_FP_TO_SINT:
     case ISD::STRICT_FP_TO_UINT:
       Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
@@ -699,7 +669,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   // Revectorize the result so the types line up with what the uses of this
   // expression expect.
-  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+  Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+  // Do our own replacement and return SDValue() to tell the caller that we
+  // handled all replacements since caller can only handle a single result.
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return SDValue();
 }
 
 /// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
@@ -804,7 +779,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
-  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+  Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+
+  // Do our own replacement and return SDValue() to tell the caller that we
+  // handled all replacements since caller can only handle a single result.
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return SDValue();
 } 
 
 SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
@@ -901,13 +882,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FNEARBYINT:
   case ISD::FNEG:
   case ISD::FP_EXTEND:
-  case ISD::STRICT_FP_EXTEND:
   case ISD::FP_ROUND:
-  case ISD::STRICT_FP_ROUND:
   case ISD::FP_TO_SINT:
-  case ISD::STRICT_FP_TO_SINT:
   case ISD::FP_TO_UINT:
-  case ISD::STRICT_FP_TO_UINT:
   case ISD::FRINT:
   case ISD::FROUND:
   case ISD::FSIN:
@@ -964,32 +941,13 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMA:
     SplitVecRes_TernaryOp(N, Lo, Hi);
     break;
-  case ISD::STRICT_FADD:
-  case ISD::STRICT_FSUB:
-  case ISD::STRICT_FMUL:
-  case ISD::STRICT_FDIV:
-  case ISD::STRICT_FREM:
-  case ISD::STRICT_FSQRT:
-  case ISD::STRICT_FMA:
-  case ISD::STRICT_FPOW:
-  case ISD::STRICT_FPOWI:
-  case ISD::STRICT_FSIN:
-  case ISD::STRICT_FCOS:
-  case ISD::STRICT_FEXP:
-  case ISD::STRICT_FEXP2:
-  case ISD::STRICT_FLOG:
-  case ISD::STRICT_FLOG10:
-  case ISD::STRICT_FLOG2:
-  case ISD::STRICT_FRINT:
-  case ISD::STRICT_FNEARBYINT:
-  case ISD::STRICT_FMAXNUM:
-  case ISD::STRICT_FMINNUM:
-  case ISD::STRICT_FCEIL:
-  case ISD::STRICT_FFLOOR:
-  case ISD::STRICT_FROUND:
-  case ISD::STRICT_FTRUNC:
+
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
     SplitVecRes_StrictFPOp(N, Lo, Hi);
     break;
+
   case ISD::UADDO:
   case ISD::SADDO:
   case ISD::USUBO:
@@ -1002,7 +960,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SMULFIXSAT:
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:
-    SplitVecRes_MULFIX(N, Lo, Hi);
+  case ISD::SDIVFIX:
+  case ISD::UDIVFIX:
+    SplitVecRes_FIX(N, Lo, Hi);
     break;
   }
 
@@ -1041,7 +1001,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
                    Op0Hi, Op1Hi, Op2Hi);
 }
 
-void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue LHSLo, LHSHi;
   GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
   SDValue RHSLo, RHSHi;
@@ -1206,9 +1166,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
 
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
-  StackPtr =
-      DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                  DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
+  StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl);
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
@@ -1304,12 +1262,12 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
   SDLoc dl(N);
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
 
-  SmallVector<SDValue, 4> OpsLo;
-  SmallVector<SDValue, 4> OpsHi;
+  SmallVector<SDValue, 4> OpsLo(NumOps);
+  SmallVector<SDValue, 4> OpsHi(NumOps);
 
   // The Chain is the first operand.
-  OpsLo.push_back(Chain);
-  OpsHi.push_back(Chain);
+  OpsLo[0] = Chain;
+  OpsHi[0] = Chain;
 
   // Now process the remaining operands.
   for (unsigned i = 1; i < NumOps; ++i) {
@@ -1327,8 +1285,8 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
         std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
     }
 
-    OpsLo.push_back(OpLo);
-    OpsHi.push_back(OpHi);
+    OpsLo[i] = OpLo;
+    OpsHi[i] = OpHi;
   }
 
   EVT LoValueVTs[] = {LoVT, MVT::Other};
@@ -1572,12 +1530,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
 
 void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
                                          SDValue &Lo, SDValue &Hi) {
+  assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
   EVT LoVT, HiVT;
   SDLoc dl(MLD);
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
 
   SDValue Ch = MLD->getChain();
   SDValue Ptr = MLD->getBasePtr();
+  SDValue Offset = MLD->getOffset();
+  assert(Offset.isUndef() && "Unexpected indexed masked load offset");
   SDValue Mask = MLD->getMask();
   SDValue PassThru = MLD->getPassThru();
   unsigned Alignment = MLD->getOriginalAlignment();
@@ -1609,8 +1570,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
-  Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, PassThruLo, LoMemVT, MMO,
-                         ExtType, MLD->isExpandingLoad());
+  Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
+                         MMO, MLD->getAddressingMode(), ExtType,
+                         MLD->isExpandingLoad());
 
   Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
                                    MLD->isExpandingLoad());
@@ -1621,8 +1583,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
       HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
       MLD->getRanges());
 
-  Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
-                         ExtType, MLD->isExpandingLoad());
+  Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT,
+                         MMO, MLD->getAddressingMode(), ExtType,
+                         MLD->isExpandingLoad());
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -1747,24 +1710,6 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
   if (N->getOpcode() == ISD::FP_ROUND) {
     Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
     Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
-  } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
-    Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, 
-                     { N->getOperand(0), Lo, N->getOperand(2) });
-    Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, 
-                     { N->getOperand(0), Hi, N->getOperand(2) });
-    SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
-                                   Lo.getValue(1), Hi.getValue(1));
-    ReplaceValueWith(SDValue(N, 1), NewChain);
-  } else if (N->isStrictFPOpcode()) {
-    Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other }, 
-                     { N->getOperand(0), Lo });
-    Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other }, 
-                     { N->getOperand(0), Hi });
-    // Legalize the chain result - switch anything that used the old chain to
-    // use the new one.
-    SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 
-                                   Lo.getValue(1), Hi.getValue(1));
-    ReplaceValueWith(SDValue(N, 1), NewChain);
   } else {
     Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
     Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -2003,9 +1948,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::VSELECT:
       Res = SplitVecOp_VSELECT(N, OpNo);
       break;
+    case ISD::STRICT_SINT_TO_FP:
+    case ISD::STRICT_UINT_TO_FP:
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:
-      if (N->getValueType(0).bitsLT(N->getOperand(0).getValueType()))
+      if (N->getValueType(0).bitsLT(
+              N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
         Res = SplitVecOp_TruncateHelper(N);
       else
         Res = SplitVecOp_UnaryOp(N);
@@ -2357,8 +2305,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
 
 SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
                                             unsigned OpNo) {
+  assert(N->isUnindexed() && "Indexed masked store of vector?");
   SDValue Ch  = N->getChain();
   SDValue Ptr = N->getBasePtr();
+  SDValue Offset = N->getOffset();
+  assert(Offset.isUndef() && "Unexpected indexed masked store offset");
   SDValue Mask = N->getMask();
   SDValue Data = N->getValue();
   EVT MemoryVT = N->getMemoryVT();
@@ -2392,8 +2343,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
                          MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
                          Alignment, N->getAAInfo(), N->getRanges());
 
-  Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
-                          N->isTruncatingStore(),
+  Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
+                          N->getAddressingMode(), N->isTruncatingStore(),
                           N->isCompressingStore());
 
   Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
@@ -2405,8 +2356,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
       HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
       N->getRanges());
 
-  Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
-                          N->isTruncatingStore(), N->isCompressingStore());
+  Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
+                          N->getAddressingMode(), N->isTruncatingStore(),
+                          N->isCompressingStore());
 
   // Build a factor node to remember that this store is independent of the
   // other one.
@@ -2562,7 +2514,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   //
   // Without this transform, the original truncate would end up being
   // scalarized, which is pretty much always a last resort.
-  SDValue InVec = N->getOperand(0);
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+  SDValue InVec = N->getOperand(OpNo);
   EVT InVT = InVec->getValueType(0);
   EVT OutVT = N->getValueType(0);
   unsigned NumElements = OutVT.getVectorNumElements();
@@ -2606,8 +2559,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
     EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
                                 NumElements/2);
-  SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
-  SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+
+  SDValue HalfLo;
+  SDValue HalfHi;
+  SDValue Chain;
+  if (N->isStrictFPOpcode()) {
+    HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+                         {N->getOperand(0), HalfLo});
+    HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
+                         {N->getOperand(0), HalfHi});
+    // Legalize the chain result - switch anything that used the old chain to
+    // use the new one.
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
+                        HalfHi.getValue(1));
+  } else {
+    HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
+    HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
+  }
   // Concatenate them to get the full intermediate truncation result.
   EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
   SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
@@ -2616,6 +2584,17 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   // type. This should normally be something that ends up being legal directly,
   // but in theory if a target has very wide vectors and an annoyingly
   // restricted set of legal types, this split can chain to build things up.
+
+  if (N->isStrictFPOpcode()) {
+    SDValue Res = DAG.getNode(
+        ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
+        {Chain, InterVec,
+         DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
+    // Relink the chain
+    ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
+    return Res;
+  }
+
   return IsFloat
              ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
                            DAG.getTargetConstant(
@@ -2774,30 +2753,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_BinaryWithExtraScalarOp(N);
     break;
 
-  case ISD::STRICT_FADD:
-  case ISD::STRICT_FSUB:
-  case ISD::STRICT_FMUL:
-  case ISD::STRICT_FDIV:
-  case ISD::STRICT_FREM:
-  case ISD::STRICT_FSQRT:
-  case ISD::STRICT_FMA:
-  case ISD::STRICT_FPOW:
-  case ISD::STRICT_FPOWI:
-  case ISD::STRICT_FSIN:
-  case ISD::STRICT_FCOS:
-  case ISD::STRICT_FEXP:
-  case ISD::STRICT_FEXP2:
-  case ISD::STRICT_FLOG:
-  case ISD::STRICT_FLOG10:
-  case ISD::STRICT_FLOG2:
-  case ISD::STRICT_FRINT:
-  case ISD::STRICT_FNEARBYINT:
-  case ISD::STRICT_FMAXNUM:
-  case ISD::STRICT_FMINNUM:
-  case ISD::STRICT_FCEIL:
-  case ISD::STRICT_FFLOOR:
-  case ISD::STRICT_FROUND:
-  case ISD::STRICT_FTRUNC:
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case ISD::STRICT_##DAGN:
+#include "llvm/IR/ConstrainedOps.def"
     Res = WidenVecRes_StrictFP(N);
     break;
 
@@ -2843,13 +2801,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_Convert(N);
     break;
 
-  case ISD::STRICT_FP_EXTEND:
-  case ISD::STRICT_FP_ROUND:
-  case ISD::STRICT_FP_TO_SINT:
-  case ISD::STRICT_FP_TO_UINT:
-    Res = WidenVecRes_Convert_StrictFP(N);
-    break;
-
   case ISD::FABS:
   case ISD::FCEIL:
   case ISD::FCOS:
@@ -3091,6 +3042,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
+  switch (N->getOpcode()) {
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:
+    return WidenVecRes_STRICT_FSETCC(N);
+  case ISD::STRICT_FP_EXTEND:
+  case ISD::STRICT_FP_ROUND:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
+  case ISD::STRICT_SINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
+   return WidenVecRes_Convert_StrictFP(N);
+  default:
+    break;
+  }
+
   // StrictFP op widening for operations that can trap.
   unsigned NumOpers = N->getNumOperands();
   unsigned Opcode = N->getOpcode();
@@ -3497,7 +3463,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
   switch (getTypeAction(InVT)) {
   case TargetLowering::TypeLegal:
     break;
-  case TargetLowering::TypePromoteInteger:
+  case TargetLowering::TypePromoteInteger: {
     // If the incoming type is a vector that is being promoted, then
     // we know that the elements are arranged differently and that we
     // must perform the conversion using a stack slot.
@@ -3506,11 +3472,24 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
 
     // If the InOp is promoted to the same size, convert it.  Otherwise,
     // fall out of the switch and widen the promoted input.
-    InOp = GetPromotedInteger(InOp);
-    InVT = InOp.getValueType();
-    if (WidenVT.bitsEq(InVT))
-      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+    SDValue NInOp = GetPromotedInteger(InOp);
+    EVT NInVT = NInOp.getValueType();
+    if (WidenVT.bitsEq(NInVT)) {
+      // For big endian targets we need to shift the input integer or the
+      // interesting bits will end up at the wrong place.
+      if (DAG.getDataLayout().isBigEndian()) {
+        unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
+        EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout());
+        assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!");
+        NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp,
+                           DAG.getConstant(ShiftAmt, dl, ShiftAmtTy));
+      }
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp);
+    }
+    InOp = NInOp;
+    InVT = NInVT;
     break;
+  }
   case TargetLowering::TypeSoftenFloat:
   case TargetLowering::TypePromoteFloat:
   case TargetLowering::TypeExpandInteger:
@@ -3748,10 +3727,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
                                     WidenVT.getVectorNumElements());
   Mask = ModifyToType(Mask, WideMaskVT, true);
 
-  SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
-                                  Mask, PassThru, N->getMemoryVT(),
-                                  N->getMemOperand(), ExtType,
-                                  N->isExpandingLoad());
+  SDValue Res = DAG.getMaskedLoad(
+      WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
+      PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
+      ExtType, N->isExpandingLoad());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3798,6 +3777,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
                      WidenVT, N->getOperand(0));
 }
 
+// Return true is this is a SETCC node or a strict version of it.
+static inline bool isSETCCOp(unsigned Opcode) {
+  switch (Opcode) {
+  case ISD::SETCC:
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:
+    return true;
+  }
+  return false;
+}
+
 // Return true if this is a node that could have two SETCCs as operands.
 static inline bool isLogicalMaskOp(unsigned Opcode) {
   switch (Opcode) {
@@ -3809,6 +3799,13 @@ static inline bool isLogicalMaskOp(unsigned Opcode) {
   return false;
 }
 
+// If N is a SETCC or a strict variant of it, return the type
+// of the compare operands.
+static inline EVT getSETCCOperandType(SDValue N) {
+  unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+  return N->getOperand(OpNo).getValueType();
+}
+
 // This is used just for the assert in convertMask(). Check that this either
 // a SETCC or a previously handled SETCC by convertMask().
 #ifndef NDEBUG
@@ -3831,7 +3828,7 @@ static inline bool isSETCCorConvertedSETCC(SDValue N) {
     return isSETCCorConvertedSETCC(N.getOperand(0)) &&
            isSETCCorConvertedSETCC(N.getOperand(1));
 
-  return (N.getOpcode() == ISD::SETCC ||
+  return (isSETCCOp(N.getOpcode()) ||
           ISD::isBuildVectorOfConstantSDNodes(N.getNode()));
 }
 #endif
@@ -3846,10 +3843,17 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
   assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
 
   // Make a new Mask node, with a legal result VT.
+  SDValue Mask;
   SmallVector<SDValue, 4> Ops;
   for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
     Ops.push_back(InMask->getOperand(i));
-  SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
+  if (InMask->isStrictFPOpcode()) {
+    Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask),
+                       { MaskVT, MVT::Other }, Ops);
+    ReplaceValueWith(InMask.getValue(1), Mask.getValue(1));
+  }
+  else
+    Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
 
   // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
   // extend or truncate is needed.
@@ -3902,7 +3906,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
   if (N->getOpcode() != ISD::VSELECT)
     return SDValue();
 
-  if (Cond->getOpcode() != ISD::SETCC && !isLogicalMaskOp(Cond->getOpcode()))
+  if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode()))
     return SDValue();
 
   // If this is a splitted VSELECT that was previously already handled, do
@@ -3925,8 +3929,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
     return SDValue();
 
   // If there is support for an i1 vector mask, don't touch.
-  if (Cond.getOpcode() == ISD::SETCC) {
-    EVT SetCCOpVT = Cond->getOperand(0).getValueType();
+  if (isSETCCOp(Cond.getOpcode())) {
+    EVT SetCCOpVT = getSETCCOperandType(Cond);
     while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
       SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
     EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
@@ -3957,17 +3961,17 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
     ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
 
   SDValue Mask;
-  if (Cond->getOpcode() == ISD::SETCC) {
-    EVT MaskVT = getSetCCResultType(Cond.getOperand(0).getValueType());
+  if (isSETCCOp(Cond->getOpcode())) {
+    EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond));
     Mask = convertMask(Cond, MaskVT, ToMaskVT);
   } else if (isLogicalMaskOp(Cond->getOpcode()) &&
-             Cond->getOperand(0).getOpcode() == ISD::SETCC &&
-             Cond->getOperand(1).getOpcode() == ISD::SETCC) {
+             isSETCCOp(Cond->getOperand(0).getOpcode()) &&
+             isSETCCOp(Cond->getOperand(1).getOpcode())) {
     // Cond is (AND/OR/XOR (SETCC, SETCC))
     SDValue SETCC0 = Cond->getOperand(0);
     SDValue SETCC1 = Cond->getOperand(1);
-    EVT VT0 = getSetCCResultType(SETCC0.getOperand(0).getValueType());
-    EVT VT1 = getSetCCResultType(SETCC1.getOperand(0).getValueType());
+    EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0));
+    EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1));
     unsigned ScalarBits0 = VT0.getScalarSizeInBits();
     unsigned ScalarBits1 = VT1.getScalarSizeInBits();
     unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
@@ -4119,6 +4123,47 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
                      WidenVT, InOp1, InOp2, N->getOperand(2));
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
+  assert(N->getValueType(0).isVector() &&
+         N->getOperand(1).getValueType().isVector() &&
+         "Operands must be vectors");
+  EVT VT = N->getValueType(0);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  unsigned NumElts = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+
+  SDLoc dl(N);
+  SDValue Chain = N->getOperand(0);
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+  SDValue CC = N->getOperand(3);
+  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+
+  // Fully unroll and reassemble.
+  SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT));
+  SmallVector<SDValue, 8> Chains(NumElts);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    SDValue LHSElem = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    SDValue RHSElem = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+    Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+                             {Chain, LHSElem, RHSElem, CC});
+    Chains[i] = Scalars[i].getValue(1);
+    Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+                               DAG.getBoolConstant(true, dl, EltVT, VT),
+                               DAG.getBoolConstant(false, dl, EltVT, VT));
+  }
+
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  ReplaceValueWith(SDValue(N, 1), NewChain);
+
+  return DAG.getBuildVector(WidenVT, dl, Scalars);
+}
 
 //===----------------------------------------------------------------------===//
 // Widen Vector Operand
@@ -4150,6 +4195,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::MGATHER:            Res = WidenVecOp_MGATHER(N, OpNo); break;
   case ISD::MSCATTER:           Res = WidenVecOp_MSCATTER(N, OpNo); break;
   case ISD::SETCC:              Res = WidenVecOp_SETCC(N); break;
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:     Res = WidenVecOp_STRICT_FSETCC(N); break;
   case ISD::VSELECT:            Res = WidenVecOp_VSELECT(N); break;
   case ISD::FCOPYSIGN:          Res = WidenVecOp_FCOPYSIGN(N); break;
 
@@ -4161,12 +4208,16 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
 
   case ISD::FP_EXTEND:
   case ISD::STRICT_FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::STRICT_FP_ROUND:
   case ISD::FP_TO_SINT:
   case ISD::STRICT_FP_TO_SINT:
   case ISD::FP_TO_UINT:
   case ISD::STRICT_FP_TO_UINT:
   case ISD::SINT_TO_FP:
+  case ISD::STRICT_SINT_TO_FP:
   case ISD::UINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
   case ISD::TRUNCATE:
     Res = WidenVecOp_Convert(N);
     break;
@@ -4297,13 +4348,21 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
     SDValue Res;
     if (N->isStrictFPOpcode()) {
-      Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, 
-                        { N->getOperand(0), InOp });
+      if (Opcode == ISD::STRICT_FP_ROUND)
+        Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+                          { N->getOperand(0), InOp, N->getOperand(2) });
+      else
+        Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+                          { N->getOperand(0), InOp });
       // Legalize the chain result - switch anything that used the old chain to
       // use the new one.
       ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
-    } else
-      Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+    } else {
+      if (Opcode == ISD::FP_ROUND)
+        Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
+      else
+        Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+    }
     return DAG.getNode(
         ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -4486,7 +4545,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
          StVal.getValueType().getVectorNumElements() &&
          "Mask and data vectors should have the same number of elements");
   return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
-                            Mask, MST->getMemoryVT(), MST->getMemOperand(),
+                            MST->getOffset(), Mask, MST->getMemoryVT(),
+                            MST->getMemOperand(), MST->getAddressingMode(),
                             false, MST->isCompressingStore());
 }
 
@@ -4580,6 +4640,44 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
   return DAG.getNode(ExtendCode, dl, VT, CC);
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue LHS = GetWidenedVector(N->getOperand(1));
+  SDValue RHS = GetWidenedVector(N->getOperand(2));
+  SDValue CC = N->getOperand(3);
+  SDLoc dl(N);
+
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // Unroll into a build vector.
+  SmallVector<SDValue, 8> Scalars(NumElts);
+  SmallVector<SDValue, 8> Chains(NumElts);
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    SDValue LHSElem = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+    SDValue RHSElem = DAG.getNode(
+        ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+        DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+    Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
+                             {Chain, LHSElem, RHSElem, CC});
+    Chains[i] = Scalars[i].getValue(1);
+    Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
+                               DAG.getBoolConstant(true, dl, EltVT, VT),
+                               DAG.getBoolConstant(false, dl, EltVT, VT));
+  }
+
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  ReplaceValueWith(SDValue(N, 1), NewChain);
+
+  return DAG.getBuildVector(VT, dl, Scalars);
+}
+
 SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
   SDLoc dl(N);
   SDValue Op = GetWidenedVector(N->getOperand(0));
@@ -4670,7 +4768,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
                        unsigned Width, EVT WidenVT,
                        unsigned Align = 0, unsigned WidenEx = 0) {
   EVT WidenEltVT = WidenVT.getVectorElementType();
-  unsigned WidenWidth = WidenVT.getSizeInBits();
+  const bool Scalable = WidenVT.isScalableVector();
+  unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
   unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
   unsigned AlignInBits = Align*8;
 
@@ -4681,23 +4780,27 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
 
   // See if there is larger legal integer than the element type to load/store.
   unsigned VT;
-  for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
-       VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
-    EVT MemVT((MVT::SimpleValueType) VT);
-    unsigned MemVTWidth = MemVT.getSizeInBits();
-    if (MemVT.getSizeInBits() <= WidenEltWidth)
-      break;
-    auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
-    if ((Action == TargetLowering::TypeLegal ||
-         Action == TargetLowering::TypePromoteInteger) &&
-        (WidenWidth % MemVTWidth) == 0 &&
-        isPowerOf2_32(WidenWidth / MemVTWidth) &&
-        (MemVTWidth <= Width ||
-         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
-      if (MemVTWidth == WidenWidth)
-        return MemVT;
-      RetVT = MemVT;
-      break;
+  // Don't bother looking for an integer type if the vector is scalable, skip
+  // to vector types.
+  if (!Scalable) {
+    for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+         VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+      EVT MemVT((MVT::SimpleValueType) VT);
+      unsigned MemVTWidth = MemVT.getSizeInBits();
+      if (MemVT.getSizeInBits() <= WidenEltWidth)
+        break;
+      auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+      if ((Action == TargetLowering::TypeLegal ||
+           Action == TargetLowering::TypePromoteInteger) &&
+          (WidenWidth % MemVTWidth) == 0 &&
+          isPowerOf2_32(WidenWidth / MemVTWidth) &&
+          (MemVTWidth <= Width ||
+           (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+        if (MemVTWidth == WidenWidth)
+          return MemVT;
+        RetVT = MemVT;
+        break;
+      }
     }
   }
 
@@ -4706,7 +4809,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
   for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
        VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
     EVT MemVT = (MVT::SimpleValueType) VT;
-    unsigned MemVTWidth = MemVT.getSizeInBits();
+    // Skip vector MVTs which don't match the scalable property of WidenVT.
+    if (Scalable != MemVT.isScalableVector())
+      continue;
+    unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize();
     auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
     if ((Action == TargetLowering::TypeLegal ||
          Action == TargetLowering::TypePromoteInteger) &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index d4c1fb36475e..0e4d783e3505 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -910,10 +910,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
       if (HasDbg)
         ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
 
-      if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+      if (MDNode *MD = DAG->getHeapAllocSite(N))
         if (NewInsn && NewInsn->isCall())
-          MF.addCodeViewHeapAllocSite(NewInsn, MD);
-      }
+          NewInsn->setHeapAllocMarker(MF, MD);
 
       GluedNodes.pop_back();
     }
@@ -923,9 +922,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
     if (HasDbg)
       ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
                         NewInsn);
+
     if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
       if (NewInsn && NewInsn->isCall())
-        MF.addCodeViewHeapAllocSite(NewInsn, MD);
+        NewInsn->setHeapAllocMarker(MF, MD);
     }
   }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 52a71b91d93f..313e07b5fdd6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -24,6 +24,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -63,6 +66,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -352,9 +356,9 @@ ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
                        (OldG << 2));       // New L bit.
 }
 
-ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+static ISD::CondCode getSetCCInverseImpl(ISD::CondCode Op, bool isIntegerLike) {
   unsigned Operation = Op;
-  if (isInteger)
+  if (isIntegerLike)
     Operation ^= 7;   // Flip L, G, E bits, but not U.
   else
     Operation ^= 15;  // Flip all of the condition bits.
@@ -365,6 +369,15 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
   return ISD::CondCode(Operation);
 }
 
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, EVT Type) {
+  return getSetCCInverseImpl(Op, Type.isInteger());
+}
+
+ISD::CondCode ISD::GlobalISel::getSetCCInverse(ISD::CondCode Op,
+                                               bool isIntegerLike) {
+  return getSetCCInverseImpl(Op, isIntegerLike);
+}
+
 /// For an integer comparison, return 1 if the comparison is a signed operation
 /// and 2 if the result is an unsigned comparison. Return zero if the operation
 /// does not depend on the sign of the input (setne and seteq).
@@ -385,7 +398,8 @@ static int isSignedOp(ISD::CondCode Opcode) {
 }
 
 ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
-                                       bool IsInteger) {
+                                       EVT Type) {
+  bool IsInteger = Type.isInteger();
   if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
     // Cannot fold a signed integer setcc with an unsigned integer setcc.
     return ISD::SETCC_INVALID;
@@ -405,7 +419,8 @@ ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
 }
 
 ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
-                                        bool IsInteger) {
+                                        EVT Type) {
+  bool IsInteger = Type.isInteger();
   if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
     // Cannot fold a signed setcc with an unsigned setcc.
     return ISD::SETCC_INVALID;
@@ -1005,7 +1020,9 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
 void SelectionDAG::init(MachineFunction &NewMF,
                         OptimizationRemarkEmitter &NewORE,
                         Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
-                        LegacyDivergenceAnalysis * Divergence) {
+                        LegacyDivergenceAnalysis * Divergence,
+                        ProfileSummaryInfo *PSIin,
+                        BlockFrequencyInfo *BFIin) {
   MF = &NewMF;
   SDAGISelPass = PassPtr;
   ORE = &NewORE;
@@ -1014,6 +1031,8 @@ void SelectionDAG::init(MachineFunction &NewMF,
   LibInfo = LibraryInfo;
   Context = &MF->getFunction().getContext();
   DA = Divergence;
+  PSI = PSIin;
+  BFI = BFIin;
 }
 
 SelectionDAG::~SelectionDAG() {
@@ -1023,6 +1042,11 @@ SelectionDAG::~SelectionDAG() {
   delete DbgInfo;
 }
 
+bool SelectionDAG::shouldOptForSize() const {
+  return MF->getFunction().hasOptSize() ||
+      llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI);
+}
+
 void SelectionDAG::allnodes_clear() {
   assert(&*AllNodes.begin() == &EntryNode);
   AllNodes.remove(AllNodes.begin());
@@ -1101,6 +1125,20 @@ SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) {
              : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL));
 }
 
+std::pair<SDValue, SDValue>
+SelectionDAG::getStrictFPExtendOrRound(SDValue Op, SDValue Chain,
+                                       const SDLoc &DL, EVT VT) {
+  assert(!VT.bitsEq(Op.getValueType()) &&
+         "Strict no-op FP extend/round not allowed.");
+  SDValue Res =
+      VT.bitsGT(Op.getValueType())
+          ? getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op})
+          : getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+                    {Chain, Op, getIntPtrConstant(0, DL)});
+
+  return std::pair<SDValue, SDValue>(Res, SDValue(Res.getNode(), 1));
+}
+
 SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
   return VT.bitsGT(Op.getValueType()) ?
     getNode(ISD::ANY_EXTEND, DL, VT, Op) :
@@ -1279,7 +1317,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
   }
 
   SDValue Result(N, 0);
-  if (VT.isVector())
+  if (VT.isScalableVector())
+    Result = getSplatVector(VT, DL, Result);
+  else if (VT.isVector())
     Result = getSplatBuildVector(VT, DL, Result);
 
   return Result;
@@ -1425,7 +1465,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = MF->getFunction().hasOptSize()
+    Alignment = shouldOptForSize()
                     ? getDataLayout().getABITypeAlignment(C->getType())
                     : getDataLayout().getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -2379,9 +2419,10 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
 
 /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
 /// is less than the element bit-width of the shift node, return it.
-static const APInt *getValidShiftAmountConstant(SDValue V) {
+static const APInt *getValidShiftAmountConstant(SDValue V,
+                                                const APInt &DemandedElts) {
   unsigned BitWidth = V.getScalarValueSizeInBits();
-  if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
+  if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) {
     // Shifting more than the bitwidth is not valid.
     const APInt &ShAmt = SA->getAPIntValue();
     if (ShAmt.ult(BitWidth))
@@ -2392,13 +2433,16 @@ static const APInt *getValidShiftAmountConstant(SDValue V) {
 
 /// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
 /// than the element bit-width of the shift node, return the minimum value.
-static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
+static const APInt *
+getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
   unsigned BitWidth = V.getScalarValueSizeInBits();
   auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
   if (!BV)
     return nullptr;
   const APInt *MinShAmt = nullptr;
   for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    if (!DemandedElts[i])
+      continue;
     auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
     if (!SA)
       return nullptr;
@@ -2413,6 +2457,32 @@ static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
   return MinShAmt;
 }
 
+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
+/// than the element bit-width of the shift node, return the maximum value.
+static const APInt *
+getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) {
+  unsigned BitWidth = V.getScalarValueSizeInBits();
+  auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+  if (!BV)
+    return nullptr;
+  const APInt *MaxShAmt = nullptr;
+  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    if (!DemandedElts[i])
+      continue;
+    auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+    if (!SA)
+      return nullptr;
+    // Shifting more than the bitwidth is not valid.
+    const APInt &ShAmt = SA->getAPIntValue();
+    if (ShAmt.uge(BitWidth))
+      return nullptr;
+    if (MaxShAmt && MaxShAmt->uge(ShAmt))
+      continue;
+    MaxShAmt = &ShAmt;
+  }
+  return MaxShAmt;
+}
+
 /// Determine which bits of Op are known to be either zero or one and return
 /// them in Known. For vectors, the known bits are those that are shared by
 /// every vector element.
@@ -2784,37 +2854,60 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
       Known.Zero.setBitsFrom(1);
     break;
   case ISD::SETCC:
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS: {
+    unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
     // If we know the result of a setcc has the top bits zero, use this info.
-    if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+    if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
             TargetLowering::ZeroOrOneBooleanContent &&
         BitWidth > 1)
       Known.Zero.setBitsFrom(1);
     break;
+  }
   case ISD::SHL:
-    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
       unsigned Shift = ShAmt->getZExtValue();
       Known.Zero <<= Shift;
       Known.One <<= Shift;
       // Low bits are known zero.
       Known.Zero.setLowBits(Shift);
+      break;
     }
+
+    // No matter the shift amount, the trailing zeros will stay zero.
+    Known.Zero = APInt::getLowBitsSet(BitWidth, Known.countMinTrailingZeros());
+    Known.One.clearAllBits();
+
+    // Minimum shift low bits are known zero.
+    if (const APInt *ShMinAmt =
+            getValidMinimumShiftAmountConstant(Op, DemandedElts))
+      Known.Zero.setLowBits(ShMinAmt->getZExtValue());
     break;
   case ISD::SRL:
-    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
-      Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
       unsigned Shift = ShAmt->getZExtValue();
       Known.Zero.lshrInPlace(Shift);
       Known.One.lshrInPlace(Shift);
       // High bits are known zero.
       Known.Zero.setHighBits(Shift);
-    } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {
-      // Minimum shift high bits are known zero.
-      Known.Zero.setHighBits(ShMinAmt->getZExtValue());
+      break;
     }
+
+    // No matter the shift amount, the leading zeros will stay zero.
+    Known.Zero = APInt::getHighBitsSet(BitWidth, Known.countMinLeadingZeros());
+    Known.One.clearAllBits();
+
+    // Minimum shift high bits are known zero.
+    if (const APInt *ShMinAmt =
+            getValidMinimumShiftAmountConstant(Op, DemandedElts))
+      Known.Zero.setHighBits(ShMinAmt->getZExtValue());
     break;
   case ISD::SRA:
-    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
       Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
       unsigned Shift = ShAmt->getZExtValue();
       // Sign extend known zero/one bit (else is unknown).
@@ -3336,20 +3429,20 @@ SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
     KnownBits N0Known = computeKnownBits(N0);
 
     bool overflow;
-    (void)(~N0Known.Zero).uadd_ov(~N1Known.Zero, overflow);
+    (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow);
     if (!overflow)
       return OFK_Never;
   }
 
   // mulhi + 1 never overflow
   if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
-      (~N1Known.Zero & 0x01) == ~N1Known.Zero)
+      (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue())
     return OFK_Never;
 
   if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
     KnownBits N0Known = computeKnownBits(N0);
 
-    if ((~N0Known.Zero & 0x01) == ~N0Known.Zero)
+    if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue())
       return OFK_Never;
   }
 
@@ -3550,25 +3643,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
     Tmp = VTBits - SrcVT.getScalarSizeInBits();
     return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
   }
-
   case ISD::SRA:
-    Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
-    // SRA X, C   -> adds C sign bits.
-    if (ConstantSDNode *C =
-            isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
-      APInt ShiftVal = C->getAPIntValue();
-      ShiftVal += Tmp;
-      Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
-    }
+    Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+    // SRA X, C -> adds C sign bits.
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts))
+      Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
+    else if (const APInt *ShAmt =
+                 getValidMinimumShiftAmountConstant(Op, DemandedElts))
+      Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits);
     return Tmp;
   case ISD::SHL:
-    if (ConstantSDNode *C =
-            isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
-      // shl destroys sign bits.
-      Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
-      if (C->getAPIntValue().uge(VTBits) ||      // Bad shift.
-          C->getAPIntValue().uge(Tmp)) break;    // Shifted all sign bits out.
-      return Tmp - C->getZExtValue();
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) {
+      // shl destroys sign bits, ensure it doesn't shift out all sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+      if (ShAmt->ult(Tmp))
+        return Tmp - ShAmt->getZExtValue();
+    } else if (const APInt *ShAmt =
+                   getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
+      Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+      if (ShAmt->ult(Tmp))
+        return Tmp - ShAmt->getZExtValue();
     }
     break;
   case ISD::AND:
@@ -3648,11 +3742,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
       return VTBits;
     break;
   case ISD::SETCC:
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS: {
+    unsigned OpNo = Op->isStrictFPOpcode() ? 1 : 0;
     // If setcc returns 0/-1, all bits are sign bits.
-    if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+    if (TLI->getBooleanContents(Op.getOperand(OpNo).getValueType()) ==
         TargetLowering::ZeroOrNegativeOneBooleanContent)
       return VTBits;
     break;
+  }
   case ISD::ROTL:
   case ISD::ROTR:
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -4648,11 +4746,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
 
-    // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
-    if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
-        OpOpcode == ISD::FSUB)
-      return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
-                     Operand.getOperand(0), Flags);
     if (OpOpcode == ISD::FNEG)  // --X -> X
       return Operand.getOperand(0);
     break;
@@ -4689,46 +4782,46 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   return V;
 }
 
-static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
-                                        const APInt &C2) {
+static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
+                                       const APInt &C2) {
   switch (Opcode) {
-  case ISD::ADD:  return std::make_pair(C1 + C2, true);
-  case ISD::SUB:  return std::make_pair(C1 - C2, true);
-  case ISD::MUL:  return std::make_pair(C1 * C2, true);
-  case ISD::AND:  return std::make_pair(C1 & C2, true);
-  case ISD::OR:   return std::make_pair(C1 | C2, true);
-  case ISD::XOR:  return std::make_pair(C1 ^ C2, true);
-  case ISD::SHL:  return std::make_pair(C1 << C2, true);
-  case ISD::SRL:  return std::make_pair(C1.lshr(C2), true);
-  case ISD::SRA:  return std::make_pair(C1.ashr(C2), true);
-  case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
-  case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
-  case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
-  case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
-  case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
-  case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
-  case ISD::SADDSAT: return std::make_pair(C1.sadd_sat(C2), true);
-  case ISD::UADDSAT: return std::make_pair(C1.uadd_sat(C2), true);
-  case ISD::SSUBSAT: return std::make_pair(C1.ssub_sat(C2), true);
-  case ISD::USUBSAT: return std::make_pair(C1.usub_sat(C2), true);
+  case ISD::ADD:  return C1 + C2;
+  case ISD::SUB:  return C1 - C2;
+  case ISD::MUL:  return C1 * C2;
+  case ISD::AND:  return C1 & C2;
+  case ISD::OR:   return C1 | C2;
+  case ISD::XOR:  return C1 ^ C2;
+  case ISD::SHL:  return C1 << C2;
+  case ISD::SRL:  return C1.lshr(C2);
+  case ISD::SRA:  return C1.ashr(C2);
+  case ISD::ROTL: return C1.rotl(C2);
+  case ISD::ROTR: return C1.rotr(C2);
+  case ISD::SMIN: return C1.sle(C2) ? C1 : C2;
+  case ISD::SMAX: return C1.sge(C2) ? C1 : C2;
+  case ISD::UMIN: return C1.ule(C2) ? C1 : C2;
+  case ISD::UMAX: return C1.uge(C2) ? C1 : C2;
+  case ISD::SADDSAT: return C1.sadd_sat(C2);
+  case ISD::UADDSAT: return C1.uadd_sat(C2);
+  case ISD::SSUBSAT: return C1.ssub_sat(C2);
+  case ISD::USUBSAT: return C1.usub_sat(C2);
   case ISD::UDIV:
     if (!C2.getBoolValue())
       break;
-    return std::make_pair(C1.udiv(C2), true);
+    return C1.udiv(C2);
   case ISD::UREM:
     if (!C2.getBoolValue())
       break;
-    return std::make_pair(C1.urem(C2), true);
+    return C1.urem(C2);
   case ISD::SDIV:
     if (!C2.getBoolValue())
       break;
-    return std::make_pair(C1.sdiv(C2), true);
+    return C1.sdiv(C2);
   case ISD::SREM:
     if (!C2.getBoolValue())
       break;
-    return std::make_pair(C1.srem(C2), true);
+    return C1.srem(C2);
   }
-  return std::make_pair(APInt(1, 0), false);
+  return llvm::None;
 }
 
 SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
@@ -4736,12 +4829,10 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
                                              const ConstantSDNode *C2) {
   if (C1->isOpaque() || C2->isOpaque())
     return SDValue();
-
-  std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(),
-                                            C2->getAPIntValue());
-  if (!Folded.second)
-    return SDValue();
-  return getConstant(Folded.first, DL, VT);
+  if (Optional<APInt> Folded =
+          FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()))
+    return getConstant(Folded.getValue(), DL, VT);
+  return SDValue();
 }
 
 SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
@@ -5228,8 +5319,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \
              element type of the vector.");
 
-    // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
-    if (N1.isUndef())
+    // Extract from an undefined value or using an undefined index is undefined.
+    if (N1.isUndef() || N2.isUndef())
       return getUNDEF(VT);
 
     // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF
@@ -5506,6 +5597,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
     if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
       return getUNDEF(VT);
+
+    // Undefined index can be assumed out-of-bounds, so that's UNDEF too.
+    if (N3.isUndef())
+      return getUNDEF(VT);
+
+    // If the inserted element is an UNDEF, just use the input vector.
+    if (N2.isUndef())
+      return N1;
+
     break;
   }
   case ISD::INSERT_SUBVECTOR: {
@@ -5697,10 +5797,19 @@ static SDValue getMemsetStringVal(EVT VT, const SDLoc &dl, SelectionDAG &DAG,
   return SDValue(nullptr, 0);
 }
 
-SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, unsigned Offset,
-                                           const SDLoc &DL) {
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, int64_t Offset,
+                                           const SDLoc &DL,
+                                           const SDNodeFlags Flags) {
   EVT VT = Base.getValueType();
-  return getNode(ISD::ADD, DL, VT, Base, getConstant(Offset, DL, VT));
+  return getMemBasePlusOffset(Base, getConstant(Offset, DL, VT), DL, Flags);
+}
+
+SDValue SelectionDAG::getMemBasePlusOffset(SDValue Ptr, SDValue Offset,
+                                           const SDLoc &DL,
+                                           const SDNodeFlags Flags) {
+  assert(Offset.getValueType().isInteger());
+  EVT BasePtrVT = Ptr.getValueType();
+  return getNode(ISD::ADD, DL, BasePtrVT, Ptr, Offset, Flags);
 }
 
 /// Returns true if memcpy source is constant data.
@@ -5722,12 +5831,13 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
                                   SrcDelta + G->getOffset());
 }
 
-static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF,
+                                      SelectionDAG &DAG) {
   // On Darwin, -Os means optimize for size without hurting performance, so
   // only really optimize for size when -Oz (MinSize) is used.
   if (MF.getTarget().getTargetTriple().isOSDarwin())
     return MF.getFunction().hasMinSize();
-  return MF.getFunction().hasOptSize();
+  return DAG.shouldOptForSize();
 }
 
 static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
@@ -5777,7 +5887,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  bool OptSize = shouldLowerMemFuncForSize(MF);
+  bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -5960,7 +6070,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  bool OptSize = shouldLowerMemFuncForSize(MF);
+  bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -6066,7 +6176,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo &MFI = MF.getFrameInfo();
-  bool OptSize = shouldLowerMemFuncForSize(MF);
+  bool OptSize = shouldLowerMemFuncForSize(MF, DAG);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -6557,7 +6667,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(
   if (Align == 0)  // Ensure that codegen never sees alignment 0
     Align = getEVTAlignment(MemVT);
 
-  if (!Size)
+  if (!Size && MemVT.isScalableVector())
+    Size = MemoryLocation::UnknownSize;
+  else if (!Size)
     Size = MemVT.getStoreSize();
 
   MachineFunction &MF = getMachineFunction();
@@ -6951,16 +7063,22 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
 }
 
 SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
-                                    SDValue Ptr, SDValue Mask, SDValue PassThru,
-                                    EVT MemVT, MachineMemOperand *MMO,
+                                    SDValue Base, SDValue Offset, SDValue Mask,
+                                    SDValue PassThru, EVT MemVT,
+                                    MachineMemOperand *MMO,
+                                    ISD::MemIndexedMode AM,
                                     ISD::LoadExtType ExtTy, bool isExpanding) {
-  SDVTList VTs = getVTList(VT, MVT::Other);
-  SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
+  bool Indexed = AM != ISD::UNINDEXED;
+  assert((Indexed || Offset.isUndef()) &&
+         "Unindexed masked load with an offset!");
+  SDVTList VTs = Indexed ? getVTList(VT, Base.getValueType(), MVT::Other)
+                         : getVTList(VT, MVT::Other);
+  SDValue Ops[] = {Chain, Base, Offset, Mask, PassThru};
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
   ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
-      dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
+      dl.getIROrder(), VTs, AM, ExtTy, isExpanding, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -6968,7 +7086,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
     return SDValue(E, 0);
   }
   auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
-                                        ExtTy, isExpanding, MemVT, MMO);
+                                        AM, ExtTy, isExpanding, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -6978,27 +7096,45 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
   return V;
 }
 
+SDValue SelectionDAG::getIndexedMaskedLoad(SDValue OrigLoad, const SDLoc &dl,
+                                           SDValue Base, SDValue Offset,
+                                           ISD::MemIndexedMode AM) {
+  MaskedLoadSDNode *LD = cast<MaskedLoadSDNode>(OrigLoad);
+  assert(LD->getOffset().isUndef() && "Masked load is already a indexed load!");
+  return getMaskedLoad(OrigLoad.getValueType(), dl, LD->getChain(), Base,
+                       Offset, LD->getMask(), LD->getPassThru(),
+                       LD->getMemoryVT(), LD->getMemOperand(), AM,
+                       LD->getExtensionType(), LD->isExpandingLoad());
+}
+
 SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
-                                     SDValue Val, SDValue Ptr, SDValue Mask,
-                                     EVT MemVT, MachineMemOperand *MMO,
-                                     bool IsTruncating, bool IsCompressing) {
+                                     SDValue Val, SDValue Base, SDValue Offset,
+                                     SDValue Mask, EVT MemVT,
+                                     MachineMemOperand *MMO,
+                                     ISD::MemIndexedMode AM, bool IsTruncating,
+                                     bool IsCompressing) {
   assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
-  SDVTList VTs = getVTList(MVT::Other);
-  SDValue Ops[] = { Chain, Val, Ptr, Mask };
+  bool Indexed = AM != ISD::UNINDEXED;
+  assert((Indexed || Offset.isUndef()) &&
+         "Unindexed masked store with an offset!");
+  SDVTList VTs = Indexed ? getVTList(Base.getValueType(), MVT::Other)
+                         : getVTList(MVT::Other);
+  SDValue Ops[] = {Chain, Val, Base, Offset, Mask};
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
   ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
-      dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
+      dl.getIROrder(), VTs, AM, IsTruncating, IsCompressing, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
     cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
-                                         IsTruncating, IsCompressing, MemVT, MMO);
+  auto *N =
+      newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+                                   IsTruncating, IsCompressing, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -7008,6 +7144,17 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
   return V;
 }
 
+SDValue SelectionDAG::getIndexedMaskedStore(SDValue OrigStore, const SDLoc &dl,
+                                            SDValue Base, SDValue Offset,
+                                            ISD::MemIndexedMode AM) {
+  MaskedStoreSDNode *ST = cast<MaskedStoreSDNode>(OrigStore);
+  assert(ST->getOffset().isUndef() &&
+         "Masked store is already a indexed store!");
+  return getMaskedStore(ST->getChain(), dl, ST->getValue(), Base, Offset,
+                        ST->getMask(), ST->getMemoryVT(), ST->getMemOperand(),
+                        AM, ST->isTruncatingStore(), ST->isCompressingStore());
+}
+
 SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
                                       ArrayRef<SDValue> Ops,
                                       MachineMemOperand *MMO,
@@ -7263,8 +7410,40 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
   if (VTList.NumVTs == 1)
     return getNode(Opcode, DL, VTList.VTs[0], Ops);
 
-#if 0
   switch (Opcode) {
+  case ISD::STRICT_FP_EXTEND:
+    assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
+           "Invalid STRICT_FP_EXTEND!");
+    assert(VTList.VTs[0].isFloatingPoint() &&
+           Ops[1].getValueType().isFloatingPoint() && "Invalid FP cast!");
+    assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+           "STRICT_FP_EXTEND result type should be vector iff the operand "
+           "type is vector!");
+    assert((!VTList.VTs[0].isVector() ||
+            VTList.VTs[0].getVectorNumElements() ==
+            Ops[1].getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) &&
+           "Invalid fpext node, dst <= src!");
+    break;
+  case ISD::STRICT_FP_ROUND:
+    assert(VTList.NumVTs == 2 && Ops.size() == 3 && "Invalid STRICT_FP_ROUND!");
+    assert(VTList.VTs[0].isVector() == Ops[1].getValueType().isVector() &&
+           "STRICT_FP_ROUND result type should be vector iff the operand "
+           "type is vector!");
+    assert((!VTList.VTs[0].isVector() ||
+            VTList.VTs[0].getVectorNumElements() ==
+            Ops[1].getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    assert(VTList.VTs[0].isFloatingPoint() &&
+           Ops[1].getValueType().isFloatingPoint() &&
+           VTList.VTs[0].bitsLT(Ops[1].getValueType()) &&
+           isa<ConstantSDNode>(Ops[2]) &&
+           (cast<ConstantSDNode>(Ops[2])->getZExtValue() == 0 ||
+            cast<ConstantSDNode>(Ops[2])->getZExtValue() == 1) &&
+           "Invalid STRICT_FP_ROUND!");
+    break;
+#if 0
   // FIXME: figure out how to safely handle things like
   // int foo(int x) { return 1 << (x & 255); }
   // int bar() { return foo(256); }
@@ -7283,8 +7462,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
           return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
       }
     break;
-  }
 #endif
+  }
 
   // Memoize the node unless it returns a flag.
   SDNode *N;
@@ -7740,38 +7919,11 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
   switch (OrigOpc) {
   default:
     llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
-  case ISD::STRICT_FADD:       NewOpc = ISD::FADD;       break;
-  case ISD::STRICT_FSUB:       NewOpc = ISD::FSUB;       break;
-  case ISD::STRICT_FMUL:       NewOpc = ISD::FMUL;       break;
-  case ISD::STRICT_FDIV:       NewOpc = ISD::FDIV;       break;
-  case ISD::STRICT_FREM:       NewOpc = ISD::FREM;       break;
-  case ISD::STRICT_FMA:        NewOpc = ISD::FMA;        break;
-  case ISD::STRICT_FSQRT:      NewOpc = ISD::FSQRT;      break;
-  case ISD::STRICT_FPOW:       NewOpc = ISD::FPOW;       break;
-  case ISD::STRICT_FPOWI:      NewOpc = ISD::FPOWI;      break;
-  case ISD::STRICT_FSIN:       NewOpc = ISD::FSIN;       break;
-  case ISD::STRICT_FCOS:       NewOpc = ISD::FCOS;       break;
-  case ISD::STRICT_FEXP:       NewOpc = ISD::FEXP;       break;
-  case ISD::STRICT_FEXP2:      NewOpc = ISD::FEXP2;      break;
-  case ISD::STRICT_FLOG:       NewOpc = ISD::FLOG;       break;
-  case ISD::STRICT_FLOG10:     NewOpc = ISD::FLOG10;     break;
-  case ISD::STRICT_FLOG2:      NewOpc = ISD::FLOG2;      break;
-  case ISD::STRICT_LRINT:      NewOpc = ISD::LRINT;      break;
-  case ISD::STRICT_LLRINT:     NewOpc = ISD::LLRINT;     break;
-  case ISD::STRICT_FRINT:      NewOpc = ISD::FRINT;      break;
-  case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
-  case ISD::STRICT_FMAXNUM:    NewOpc = ISD::FMAXNUM;    break;
-  case ISD::STRICT_FMINNUM:    NewOpc = ISD::FMINNUM;    break;
-  case ISD::STRICT_FCEIL:      NewOpc = ISD::FCEIL;      break;
-  case ISD::STRICT_FFLOOR:     NewOpc = ISD::FFLOOR;     break;
-  case ISD::STRICT_LROUND:     NewOpc = ISD::LROUND;     break;
-  case ISD::STRICT_LLROUND:    NewOpc = ISD::LLROUND;    break;
-  case ISD::STRICT_FROUND:     NewOpc = ISD::FROUND;     break;
-  case ISD::STRICT_FTRUNC:     NewOpc = ISD::FTRUNC;     break;
-  case ISD::STRICT_FP_ROUND:   NewOpc = ISD::FP_ROUND;   break;
-  case ISD::STRICT_FP_EXTEND:  NewOpc = ISD::FP_EXTEND;  break;
-  case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
-  case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break;
+#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
+  case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break;
+#include "llvm/IR/ConstrainedOps.def"
   }
 
   assert(Node->getNumValues() == 2 && "Unexpected number of results!");
@@ -8051,9 +8203,9 @@ void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
       Expr = *Fragment;
     }
     // Clone the SDDbgValue and move it to To.
-    SDDbgValue *Clone =
-        getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(),
-                    Dbg->getDebugLoc(), Dbg->getOrder());
+    SDDbgValue *Clone = getDbgValue(
+        Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), Dbg->getDebugLoc(),
+        std::max(ToNode->getIROrder(), Dbg->getOrder()));
     ClonedDVs.push_back(Clone);
 
     if (InvalidateDbg) {
@@ -8831,7 +8983,9 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
   // We check here that the size of the memory operand fits within the size of
   // the MMO. This is because the MMO might indicate only a possible address
   // range instead of specifying the affected memory addresses precisely.
-  assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!");
+  // TODO: Make MachineMemOperands aware of scalable vectors.
+  assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() &&
+         "Size mismatch!");
 }
 
 /// Profile - Gather unique data for the node.
@@ -9245,11 +9399,11 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
 /// it cannot be inferred.
 unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   // If this is a GlobalAddress + cst, return the alignment.
-  const GlobalValue *GV;
+  const GlobalValue *GV = nullptr;
   int64_t GVOffset = 0;
   if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType());
-    KnownBits Known(IdxWidth);
+    unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType());
+    KnownBits Known(PtrWidth);
     llvm::computeKnownBits(GV, Known, getDataLayout());
     unsigned AlignBits = Known.countMinTrailingZeros();
     unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8c15563fcd23..728d963a916f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -27,11 +27,13 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
@@ -84,6 +86,8 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -722,7 +726,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
   unsigned IntermediateNumElts = IntermediateVT.isVector() ?
     IntermediateVT.getVectorNumElements() : 1;
 
-  // Convert the vector to the appropiate type if necessary.
+  // Convert the vector to the appropriate type if necessary.
   unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
 
   EVT BuiltVectorTy = EVT::getVectorVT(
@@ -1021,6 +1025,8 @@ void SelectionDAGBuilder::clear() {
   UnusedArgNodeMap.clear();
   PendingLoads.clear();
   PendingExports.clear();
+  PendingConstrainedFP.clear();
+  PendingConstrainedFPStrict.clear();
   CurInst = nullptr;
   HasTailCall = false;
   SDNodeOrder = LowestSDNodeOrder;
@@ -1031,50 +1037,66 @@ void SelectionDAGBuilder::clearDanglingDebugInfo() {
   DanglingDebugInfoMap.clear();
 }
 
-SDValue SelectionDAGBuilder::getRoot() {
-  if (PendingLoads.empty())
-    return DAG.getRoot();
-
-  if (PendingLoads.size() == 1) {
-    SDValue Root = PendingLoads[0];
-    DAG.setRoot(Root);
-    PendingLoads.clear();
-    return Root;
-  }
-
-  // Otherwise, we have to make a token factor node.
-  SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
-  PendingLoads.clear();
-  DAG.setRoot(Root);
-  return Root;
-}
-
-SDValue SelectionDAGBuilder::getControlRoot() {
+// Update DAG root to include dependencies on Pending chains.
+SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
   SDValue Root = DAG.getRoot();
 
-  if (PendingExports.empty())
+  if (Pending.empty())
     return Root;
 
-  // Turn all of the CopyToReg chains into one factored node.
+  // Add current root to PendingChains, unless we already indirectly
+  // depend on it.
   if (Root.getOpcode() != ISD::EntryToken) {
-    unsigned i = 0, e = PendingExports.size();
+    unsigned i = 0, e = Pending.size();
     for (; i != e; ++i) {
-      assert(PendingExports[i].getNode()->getNumOperands() > 1);
-      if (PendingExports[i].getNode()->getOperand(0) == Root)
+      assert(Pending[i].getNode()->getNumOperands() > 1);
+      if (Pending[i].getNode()->getOperand(0) == Root)
         break;  // Don't add the root if we already indirectly depend on it.
     }
 
     if (i == e)
-      PendingExports.push_back(Root);
+      Pending.push_back(Root);
   }
 
-  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
-                     PendingExports);
-  PendingExports.clear();
+  if (Pending.size() == 1)
+    Root = Pending[0];
+  else
+    Root = DAG.getTokenFactor(getCurSDLoc(), Pending);
+
   DAG.setRoot(Root);
+  Pending.clear();
   return Root;
 }
 
+SDValue SelectionDAGBuilder::getMemoryRoot() {
+  return updateRoot(PendingLoads);
+}
+
+SDValue SelectionDAGBuilder::getRoot() {
+  // Chain up all pending constrained intrinsics together with all
+  // pending loads, by simply appending them to PendingLoads and
+  // then calling getMemoryRoot().
+  PendingLoads.reserve(PendingLoads.size() +
+                       PendingConstrainedFP.size() +
+                       PendingConstrainedFPStrict.size());
+  PendingLoads.append(PendingConstrainedFP.begin(),
+                      PendingConstrainedFP.end());
+  PendingLoads.append(PendingConstrainedFPStrict.begin(),
+                      PendingConstrainedFPStrict.end());
+  PendingConstrainedFP.clear();
+  PendingConstrainedFPStrict.clear();
+  return getMemoryRoot();
+}
+
+SDValue SelectionDAGBuilder::getControlRoot() {
+  // We need to emit pending fpexcept.strict constrained intrinsics,
+  // so append them to the PendingExports list.
+  PendingExports.append(PendingConstrainedFPStrict.begin(),
+                        PendingConstrainedFPStrict.end());
+  PendingConstrainedFPStrict.clear();
+  return updateRoot(PendingExports);
+}
+
 void SelectionDAGBuilder::visit(const Instruction &I) {
   // Set up outgoing PHI node register values before emitting the terminator.
   if (I.isTerminator()) {
@@ -1104,6 +1126,15 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
         Node->intersectFlagsWith(IncomingFlags);
     }
   }
+  // Constrained FP intrinsics with fpexcept.ignore should also get
+  // the NoFPExcept flag.
+  if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I))
+    if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore)
+      if (SDNode *Node = getNodeForIRValue(&I)) {
+        SDNodeFlags Flags = Node->getFlags();
+        Flags.setNoFPExcept(true);
+        Node->setFlags(Flags);
+      }
 
   if (!I.isTerminator() && !HasTailCall &&
       !isStatepoint(&I)) // statepoints handle their exports internally
@@ -2746,8 +2777,9 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
 
   // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
   // have to do anything here to lower funclet bundles.
-  assert(!I.hasOperandBundlesOtherThan(
-             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+  assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+                                        LLVMContext::OB_funclet,
+                                        LLVMContext::OB_cfguardtarget}) &&
          "Cannot lower invokes with arbitrary operand bundles yet!");
 
   const Value *Callee(I.getCalledValue());
@@ -3033,7 +3065,7 @@ static bool isVectorReductionOp(const User *I) {
     if (!Visited.insert(User).second)
       continue;
 
-    for (const auto &U : User->users()) {
+    for (const auto *U : User->users()) {
       auto Inst = dyn_cast<Instruction>(U);
       if (!Inst)
         return false;
@@ -3119,6 +3151,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
   if (isVectorReductionOp(&I)) {
     Flags.setVectorReduction(true);
     LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
+
+    // If no flags are set we will propagate the incoming flags, if any flags
+    // are set, we will intersect them with the incoming flag and so we need to
+    // copy the FMF flags here.
+    if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
+      Flags.copyFMF(*FPOp);
+    }
   }
 
   SDValue Op1 = getValue(I.getOperand(0));
@@ -4039,9 +4078,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   SDValue Root;
   bool ConstantMemory = false;
-  if (isVolatile || NumValues > MaxParallelChains)
+  if (isVolatile)
     // Serialize volatile loads with other side effects.
     Root = getRoot();
+  else if (NumValues > MaxParallelChains)
+    Root = getMemoryRoot();
   else if (AA &&
            AA->pointsToConstantMemory(MemoryLocation(
                SV,
@@ -4216,10 +4257,9 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   SDValue Src = getValue(SrcV);
   SDValue Ptr = getValue(PtrV);
 
-  SDValue Root = getRoot();
+  SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
   SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
   SDLoc dl = getCurSDLoc();
-  EVT PtrVT = Ptr.getValueType();
   unsigned Alignment = I.getAlignment();
   AAMDNodes AAInfo;
   I.getAAMetadata(AAInfo);
@@ -4245,8 +4285,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
       Root = Chain;
       ChainI = 0;
     }
-    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
-                              DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
+    SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags);
     SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
     if (MemVTs[i] != ValueVTs[i])
       Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
@@ -4292,6 +4331,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
   SDValue Ptr = getValue(PtrOperand);
   SDValue Src0 = getValue(Src0Operand);
   SDValue Mask = getValue(MaskOperand);
+  SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
 
   EVT VT = Src0.getValueType();
   if (!Alignment)
@@ -4303,11 +4343,14 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
   MachineMemOperand *MMO =
     DAG.getMachineFunction().
     getMachineMemOperand(MachinePointerInfo(PtrOperand),
-                          MachineMemOperand::MOStore,  VT.getStoreSize(),
+                          MachineMemOperand::MOStore,
+                          // TODO: Make MachineMemOperands aware of scalable
+                          // vectors.
+                          VT.getStoreSize().getKnownMinSize(),
                           Alignment, AAInfo);
-  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
-                                         MMO, false /* Truncating */,
-                                         IsCompressing);
+  SDValue StoreNode =
+      DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
+                         ISD::UNINDEXED, false /* Truncating */, IsCompressing);
   DAG.setRoot(StoreNode);
   setValue(&I, StoreNode);
 }
@@ -4346,9 +4389,10 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
 
   unsigned FinalIndex = GEP->getNumOperands() - 1;
   Value *IndexVal = GEP->getOperand(FinalIndex);
+  gep_type_iterator GTI = gep_type_begin(*GEP);
 
   // Ensure all the other indices are 0.
-  for (unsigned i = 1; i < FinalIndex; ++i) {
+  for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) {
     auto *C = dyn_cast<Constant>(GEP->getOperand(i));
     if (!C)
       return false;
@@ -4361,18 +4405,39 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
 
   // The operands of the GEP may be defined in another basic block.
   // In this case we'll not find nodes for the operands.
-  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
+  if (!SDB->findValue(Ptr))
+    return false;
+  Constant *C = dyn_cast<Constant>(IndexVal);
+  if (!C && !SDB->findValue(IndexVal))
     return false;
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   const DataLayout &DL = DAG.getDataLayout();
-  Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
-                                SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+  StructType *STy = GTI.getStructTypeOrNull();
+
+  if (STy) {
+    const StructLayout *SL = DL.getStructLayout(STy);
+    if (isa<VectorType>(C->getType())) {
+      C = C->getSplatValue();
+      // FIXME: If getSplatValue may return nullptr for a structure?
+      // If not, the following check can be removed.
+      if (!C)
+        return false;
+    }
+    auto *CI = cast<ConstantInt>(C);
+    Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+    Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()),
+                            SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+  } else {
+    Scale = DAG.getTargetConstant(
+                DL.getTypeAllocSize(GEP->getResultElementType()),
+                SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+    Index = SDB->getValue(IndexVal);
+  }
   Base = SDB->getValue(Ptr);
-  Index = SDB->getValue(IndexVal);
   IndexType = ISD::SIGNED_SCALED;
 
-  if (!Index.getValueType().isVector()) {
+  if (STy || !Index.getValueType().isVector()) {
     unsigned GEPWidth = GEP->getType()->getVectorNumElements();
     EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
     Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
@@ -4383,7 +4448,7 @@ static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
-  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
+  // llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
   const Value *Ptr = I.getArgOperand(1);
   SDValue Src0 = getValue(I.getArgOperand(0));
   SDValue Mask = getValue(I.getArgOperand(3));
@@ -4407,7 +4472,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
-                         MachineMemOperand::MOStore,  VT.getStoreSize(),
+                         MachineMemOperand::MOStore,
+                         // TODO: Make MachineMemOperands aware of scalable
+                         // vectors.
+                         VT.getStoreSize().getKnownMinSize(),
                          Alignment, AAInfo);
   if (!UniformBase) {
     Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4415,7 +4483,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
     IndexType = ISD::SIGNED_SCALED;
     Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
   }
-  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
+  SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
   SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
                                          Ops, MMO, IndexType);
   DAG.setRoot(Scatter);
@@ -4452,6 +4520,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   SDValue Ptr = getValue(PtrOperand);
   SDValue Src0 = getValue(Src0Operand);
   SDValue Mask = getValue(MaskOperand);
+  SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
 
   EVT VT = Src0.getValueType();
   if (!Alignment)
@@ -4462,22 +4531,29 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
 
   // Do not serialize masked loads of constant memory with anything.
-  bool AddToChain =
-      !AA || !AA->pointsToConstantMemory(MemoryLocation(
-                 PtrOperand,
-                 LocationSize::precise(
-                     DAG.getDataLayout().getTypeStoreSize(I.getType())),
-                 AAInfo));
+  MemoryLocation ML;
+  if (VT.isScalableVector())
+    ML = MemoryLocation(PtrOperand);
+  else
+    ML = MemoryLocation(PtrOperand, LocationSize::precise(
+                           DAG.getDataLayout().getTypeStoreSize(I.getType())),
+                           AAInfo);
+  bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+
   SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
 
   MachineMemOperand *MMO =
     DAG.getMachineFunction().
     getMachineMemOperand(MachinePointerInfo(PtrOperand),
-                          MachineMemOperand::MOLoad,  VT.getStoreSize(),
+                          MachineMemOperand::MOLoad,
+                          // TODO: Make MachineMemOperands aware of scalable
+                          // vectors.
+                          VT.getStoreSize().getKnownMinSize(),
                           Alignment, AAInfo, Ranges);
 
-  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
-                                   ISD::NON_EXTLOAD, IsExpanding);
+  SDValue Load =
+      DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
+                        ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
   if (AddToChain)
     PendingLoads.push_back(Load.getValue(1));
   setValue(&I, Load);
@@ -4524,7 +4600,10 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   MachineMemOperand *MMO =
     DAG.getMachineFunction().
     getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
-                         MachineMemOperand::MOLoad,  VT.getStoreSize(),
+                         MachineMemOperand::MOLoad,
+                         // TODO: Make MachineMemOperands aware of scalable
+                         // vectors.
+                         VT.getStoreSize().getKnownMinSize(),
                          Alignment, AAInfo, Ranges);
 
   if (!UniformBase) {
@@ -4634,10 +4713,10 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Ops[3];
   Ops[0] = getRoot();
-  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
-                           TLI.getFenceOperandTy(DAG.getDataLayout()));
-  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
-                           TLI.getFenceOperandTy(DAG.getDataLayout()));
+  Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
+                                 TLI.getFenceOperandTy(DAG.getDataLayout()));
+  Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
+                                 TLI.getFenceOperandTy(DAG.getDataLayout()));
   DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
 }
 
@@ -5344,8 +5423,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
     if (Val == 0)
       return DAG.getConstantFP(1.0, DL, LHS.getValueType());
 
-    const Function &F = DAG.getMachineFunction().getFunction();
-    if (!F.hasOptSize() ||
+    bool OptForSize = DAG.shouldOptForSize();
+    if (!OptForSize ||
         // If optimizing for size, don't insert too many multiplies.
         // This inserts up to 5 multiplies.
         countPopulation(Val) + Log2_32(Val) < 7) {
@@ -5382,6 +5461,60 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
   return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
 }
 
+static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
+                            SDValue LHS, SDValue RHS, SDValue Scale,
+                            SelectionDAG &DAG, const TargetLowering &TLI) {
+  EVT VT = LHS.getValueType();
+  bool Signed = Opcode == ISD::SDIVFIX;
+  LLVMContext &Ctx = *DAG.getContext();
+
+  // If the type is legal but the operation isn't, this node might survive all
+  // the way to operation legalization. If we end up there and we do not have
+  // the ability to widen the type (if VT*2 is not legal), we cannot expand the
+  // node.
+
+  // Coax the legalizer into expanding the node during type legalization instead
+  // by bumping the size by one bit. This will force it to Promote, enabling the
+  // early expansion and avoiding the need to expand later.
+
+  // We don't have to do this if Scale is 0; that can always be expanded.
+
+  // FIXME: We wouldn't have to do this (or any of the early
+  // expansion/promotion) if it was possible to expand a libcall of an
+  // illegal type during operation legalization. But it's not, so things
+  // get a bit hacky.
+  unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
+  if (ScaleInt > 0 &&
+      (TLI.isTypeLegal(VT) ||
+       (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
+    TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
+        Opcode, VT, ScaleInt);
+    if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
+      EVT PromVT;
+      if (VT.isScalarInteger())
+        PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
+      else if (VT.isVector()) {
+        PromVT = VT.getVectorElementType();
+        PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
+        PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
+      } else
+        llvm_unreachable("Wrong VT for DIVFIX?");
+      if (Signed) {
+        LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
+        RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
+      } else {
+        LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
+        RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
+      }
+      // TODO: Saturation.
+      SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
+      return DAG.getZExtOrTrunc(Res, DL, VT);
+    }
+  }
+
+  return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
+}
+
 // getUnderlyingArgRegs - Find underlying registers used for a truncated,
 // bitcasted, or split argument. Returns a list of <Register, size in bits>
 static void
@@ -5474,7 +5607,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     // is an argument. But since we already has used %a1 to describe a parameter
     // we should not handle that last dbg.value here (that would result in an
     // incorrect hoisting of the DBG_VALUE to the function entry).
-    // Notice that we allow one dbg.value per IR level argument, to accomodate
+    // Notice that we allow one dbg.value per IR level argument, to accommodate
     // for the situation with fragments above.
     if (VariableIsFunctionInputArg) {
       unsigned ArgNo = Arg->getArgNo();
@@ -5489,7 +5622,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   MachineFunction &MF = DAG.getMachineFunction();
   const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
 
-  bool IsIndirect = false;
   Optional<MachineOperand> Op;
   // Some arguments' frame index is recorded during argument lowering.
   int FI = FuncInfo.getArgumentFrameIndex(Arg);
@@ -5511,7 +5643,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     }
     if (Reg) {
       Op = MachineOperand::CreateReg(Reg, false);
-      IsIndirect = IsDbgDeclare;
     }
   }
 
@@ -5530,15 +5661,38 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
       = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
       unsigned Offset = 0;
       for (auto RegAndSize : SplitRegs) {
+        // If the expression is already a fragment, the current register
+        // offset+size might extend beyond the fragment. In this case, only
+        // the register bits that are inside the fragment are relevant.
+        int RegFragmentSizeInBits = RegAndSize.second;
+        if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
+          uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
+          // The register is entirely outside the expression fragment,
+          // so is irrelevant for debug info.
+          if (Offset >= ExprFragmentSizeInBits)
+            break;
+          // The register is partially outside the expression fragment, only
+          // the low bits within the fragment are relevant for debug info.
+          if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
+            RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
+          }
+        }
+
         auto FragmentExpr = DIExpression::createFragmentExpression(
-          Expr, Offset, RegAndSize.second);
-        if (!FragmentExpr)
+            Expr, Offset, RegFragmentSizeInBits);
+        Offset += RegAndSize.second;
+        // If a valid fragment expression cannot be created, the variable's
+        // correct value cannot be determined and so it is set as Undef.
+        if (!FragmentExpr) {
+          SDDbgValue *SDV = DAG.getConstantDbgValue(
+              Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
+          DAG.AddDbgValue(SDV, nullptr, false);
           continue;
+        }
         assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
         FuncInfo.ArgDbgValues.push_back(
           BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
                   RegAndSize.first, Variable, *FragmentExpr));
-        Offset += RegAndSize.second;
       }
     };
 
@@ -5555,7 +5709,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
       }
 
       Op = MachineOperand::CreateReg(VMI->second, false);
-      IsIndirect = IsDbgDeclare;
     } else if (ArgRegsAndSizes.size() > 1) {
       // This was split due to the calling convention, and no virtual register
       // mapping exists for the value.
@@ -5569,9 +5722,26 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
 
   assert(Variable->isValidLocationForIntrinsic(DL) &&
          "Expected inlined-at fields to agree");
-  IsIndirect = (Op->isReg()) ? IsIndirect : true;
-  if (IsIndirect)
+
+  // If the argument arrives in a stack slot, then what the IR thought was a
+  // normal Value is actually in memory, and we must add a deref to load it.
+  if (Op->isFI()) {
+    int FI = Op->getIndex();
+    unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI);
+    if (Expr->isImplicit()) {
+      SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+      Expr = DIExpression::prependOpcodes(Expr, Ops);
+    } else {
+      Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+    }
+  }
+
+  // If this location was specified with a dbg.declare, then it and its
+  // expression calculate the address of the variable. Append a deref to
+  // force it to be a memory location.
+  if (IsDbgDeclare)
     Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
   FuncInfo.ArgDbgValues.push_back(
       BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
               *Op, Variable, Expr));
@@ -5603,20 +5773,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
                          /*IsIndirect*/ false, dl, DbgSDNodeOrder);
 }
 
-// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp) && \
-                         !defined(setjmp_undefined_for_msvc)
-#  pragma push_macro("setjmp")
-#  undef setjmp
-#  define setjmp_undefined_for_msvc
-#endif
-
 static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
   switch (Intrinsic) {
   case Intrinsic::smul_fix:
     return ISD::SMULFIX;
   case Intrinsic::umul_fix:
     return ISD::UMULFIX;
+  case Intrinsic::smul_fix_sat:
+    return ISD::SMULFIXSAT;
+  case Intrinsic::umul_fix_sat:
+    return ISD::UMULFIXSAT;
+  case Intrinsic::sdiv_fix:
+    return ISD::SDIVFIX;
+  case Intrinsic::udiv_fix:
+    return ISD::UDIVFIX;
   default:
     llvm_unreachable("Unhandled fixed point intrinsic");
   }
@@ -5687,12 +5857,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                             RegName, getValue(RegValue)));
     return;
   }
-  case Intrinsic::setjmp:
-    lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
-    return;
-  case Intrinsic::longjmp:
-    lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
-    return;
   case Intrinsic::memcpy: {
     const auto &MCI = cast<MemCpyInst>(I);
     SDValue Op1 = getValue(I.getArgOperand(0));
@@ -5706,7 +5870,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
     // FIXME: Support passing different dest/src alignments to the memcpy DAG
     // node.
-    SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+    SDValue Root = isVol ? getRoot() : getMemoryRoot();
+    SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol,
                                false, isTC,
                                MachinePointerInfo(I.getArgOperand(0)),
                                MachinePointerInfo(I.getArgOperand(1)));
@@ -5722,7 +5887,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1);
     bool isVol = MSI.isVolatile();
     bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
-    SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+    SDValue Root = isVol ? getRoot() : getMemoryRoot();
+    SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol,
                                isTC, MachinePointerInfo(I.getArgOperand(0)));
     updateDAGForMaybeTailCall(MS);
     return;
@@ -5740,7 +5906,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
     // FIXME: Support passing different dest/src alignments to the memmove DAG
     // node.
-    SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
+    SDValue Root = isVol ? getRoot() : getMemoryRoot();
+    SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol,
                                 isTC, MachinePointerInfo(I.getArgOperand(0)),
                                 MachinePointerInfo(I.getArgOperand(1)));
     updateDAGForMaybeTailCall(MM);
@@ -6102,44 +6269,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                              getValue(I.getArgOperand(1)),
                              getValue(I.getArgOperand(2))));
     return;
-  case Intrinsic::experimental_constrained_fadd:
-  case Intrinsic::experimental_constrained_fsub:
-  case Intrinsic::experimental_constrained_fmul:
-  case Intrinsic::experimental_constrained_fdiv:
-  case Intrinsic::experimental_constrained_frem:
-  case Intrinsic::experimental_constrained_fma:
-  case Intrinsic::experimental_constrained_fptosi:
-  case Intrinsic::experimental_constrained_fptoui:
-  case Intrinsic::experimental_constrained_fptrunc:
-  case Intrinsic::experimental_constrained_fpext:
-  case Intrinsic::experimental_constrained_sqrt:
-  case Intrinsic::experimental_constrained_pow:
-  case Intrinsic::experimental_constrained_powi:
-  case Intrinsic::experimental_constrained_sin:
-  case Intrinsic::experimental_constrained_cos:
-  case Intrinsic::experimental_constrained_exp:
-  case Intrinsic::experimental_constrained_exp2:
-  case Intrinsic::experimental_constrained_log:
-  case Intrinsic::experimental_constrained_log10:
-  case Intrinsic::experimental_constrained_log2:
-  case Intrinsic::experimental_constrained_lrint:
-  case Intrinsic::experimental_constrained_llrint:
-  case Intrinsic::experimental_constrained_rint:
-  case Intrinsic::experimental_constrained_nearbyint:
-  case Intrinsic::experimental_constrained_maxnum:
-  case Intrinsic::experimental_constrained_minnum:
-  case Intrinsic::experimental_constrained_ceil:
-  case Intrinsic::experimental_constrained_floor:
-  case Intrinsic::experimental_constrained_lround:
-  case Intrinsic::experimental_constrained_llround:
-  case Intrinsic::experimental_constrained_round:
-  case Intrinsic::experimental_constrained_trunc:
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case Intrinsic::INTRINSIC:
+#include "llvm/IR/ConstrainedOps.def"
     visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
     return;
   case Intrinsic::fmuladd: {
     EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI.isFMAFasterThanFMulAndFAdd(VT)) {
+        TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
       setValue(&I, DAG.getNode(ISD::FMA, sdl,
                                getValue(I.getArgOperand(0)).getValueType(),
                                getValue(I.getArgOperand(0)),
@@ -6307,7 +6445,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     return;
   }
   case Intrinsic::smul_fix:
-  case Intrinsic::umul_fix: {
+  case Intrinsic::umul_fix:
+  case Intrinsic::smul_fix_sat:
+  case Intrinsic::umul_fix_sat: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
@@ -6315,20 +6455,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                              Op1.getValueType(), Op1, Op2, Op3));
     return;
   }
-  case Intrinsic::smul_fix_sat: {
+  case Intrinsic::sdiv_fix:
+  case Intrinsic::udiv_fix: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
-    setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
-                             Op3));
-    return;
-  }
-  case Intrinsic::umul_fix_sat: {
-    SDValue Op1 = getValue(I.getArgOperand(0));
-    SDValue Op2 = getValue(I.getArgOperand(1));
-    SDValue Op3 = getValue(I.getArgOperand(2));
-    setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
-                             Op3));
+    setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+                              Op1, Op2, Op3, DAG, TLI));
     return;
   }
   case Intrinsic::stacksave: {
@@ -6681,7 +6814,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     // Add the offset to the FP.
     Value *FP = I.getArgOperand(1);
     SDValue FPVal = getValue(FP);
-    SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
+    SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
     setValue(&I, Add);
 
     return;
@@ -6876,142 +7009,82 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
 void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
     const ConstrainedFPIntrinsic &FPI) {
   SDLoc sdl = getCurSDLoc();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
+  ValueVTs.push_back(MVT::Other); // Out chain
+
+  // We do not need to serialize constrained FP intrinsics against
+  // each other or against (nonvolatile) loads, so they can be
+  // chained like loads.
+  SDValue Chain = DAG.getRoot();
+  SmallVector<SDValue, 4> Opers;
+  Opers.push_back(Chain);
+  if (FPI.isUnaryOp()) {
+    Opers.push_back(getValue(FPI.getArgOperand(0)));
+  } else if (FPI.isTernaryOp()) {
+    Opers.push_back(getValue(FPI.getArgOperand(0)));
+    Opers.push_back(getValue(FPI.getArgOperand(1)));
+    Opers.push_back(getValue(FPI.getArgOperand(2)));
+  } else {
+    Opers.push_back(getValue(FPI.getArgOperand(0)));
+    Opers.push_back(getValue(FPI.getArgOperand(1)));
+  }
+
   unsigned Opcode;
   switch (FPI.getIntrinsicID()) {
   default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
-  case Intrinsic::experimental_constrained_fadd:
-    Opcode = ISD::STRICT_FADD;
-    break;
-  case Intrinsic::experimental_constrained_fsub:
-    Opcode = ISD::STRICT_FSUB;
-    break;
-  case Intrinsic::experimental_constrained_fmul:
-    Opcode = ISD::STRICT_FMUL;
-    break;
-  case Intrinsic::experimental_constrained_fdiv:
-    Opcode = ISD::STRICT_FDIV;
-    break;
-  case Intrinsic::experimental_constrained_frem:
-    Opcode = ISD::STRICT_FREM;
-    break;
-  case Intrinsic::experimental_constrained_fma:
-    Opcode = ISD::STRICT_FMA;
-    break;
-  case Intrinsic::experimental_constrained_fptosi:
-    Opcode = ISD::STRICT_FP_TO_SINT;
-    break;
-  case Intrinsic::experimental_constrained_fptoui:
-    Opcode = ISD::STRICT_FP_TO_UINT;
-    break;
-  case Intrinsic::experimental_constrained_fptrunc:
-    Opcode = ISD::STRICT_FP_ROUND;
-    break;
-  case Intrinsic::experimental_constrained_fpext:
-    Opcode = ISD::STRICT_FP_EXTEND;
-    break;
-  case Intrinsic::experimental_constrained_sqrt:
-    Opcode = ISD::STRICT_FSQRT;
-    break;
-  case Intrinsic::experimental_constrained_pow:
-    Opcode = ISD::STRICT_FPOW;
-    break;
-  case Intrinsic::experimental_constrained_powi:
-    Opcode = ISD::STRICT_FPOWI;
-    break;
-  case Intrinsic::experimental_constrained_sin:
-    Opcode = ISD::STRICT_FSIN;
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+  case Intrinsic::INTRINSIC:                                                   \
+    Opcode = ISD::STRICT_##DAGN;                                               \
     break;
-  case Intrinsic::experimental_constrained_cos:
-    Opcode = ISD::STRICT_FCOS;
-    break;
-  case Intrinsic::experimental_constrained_exp:
-    Opcode = ISD::STRICT_FEXP;
-    break;
-  case Intrinsic::experimental_constrained_exp2:
-    Opcode = ISD::STRICT_FEXP2;
-    break;
-  case Intrinsic::experimental_constrained_log:
-    Opcode = ISD::STRICT_FLOG;
-    break;
-  case Intrinsic::experimental_constrained_log10:
-    Opcode = ISD::STRICT_FLOG10;
-    break;
-  case Intrinsic::experimental_constrained_log2:
-    Opcode = ISD::STRICT_FLOG2;
-    break;
-  case Intrinsic::experimental_constrained_lrint:
-    Opcode = ISD::STRICT_LRINT;
-    break;
-  case Intrinsic::experimental_constrained_llrint:
-    Opcode = ISD::STRICT_LLRINT;
-    break;
-  case Intrinsic::experimental_constrained_rint:
-    Opcode = ISD::STRICT_FRINT;
-    break;
-  case Intrinsic::experimental_constrained_nearbyint:
-    Opcode = ISD::STRICT_FNEARBYINT;
-    break;
-  case Intrinsic::experimental_constrained_maxnum:
-    Opcode = ISD::STRICT_FMAXNUM;
-    break;
-  case Intrinsic::experimental_constrained_minnum:
-    Opcode = ISD::STRICT_FMINNUM;
-    break;
-  case Intrinsic::experimental_constrained_ceil:
-    Opcode = ISD::STRICT_FCEIL;
-    break;
-  case Intrinsic::experimental_constrained_floor:
-    Opcode = ISD::STRICT_FFLOOR;
-    break;
-  case Intrinsic::experimental_constrained_lround:
-    Opcode = ISD::STRICT_LROUND;
-    break;
-  case Intrinsic::experimental_constrained_llround:
-    Opcode = ISD::STRICT_LLROUND;
-    break;
-  case Intrinsic::experimental_constrained_round:
-    Opcode = ISD::STRICT_FROUND;
+#include "llvm/IR/ConstrainedOps.def"
+  }
+
+  // A few strict DAG nodes carry additional operands that are not
+  // set up by the default code above.
+  switch (Opcode) {
+  default: break;
+  case ISD::STRICT_FP_ROUND:
+    Opers.push_back(
+        DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
     break;
-  case Intrinsic::experimental_constrained_trunc:
-    Opcode = ISD::STRICT_FTRUNC;
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS: {
+    auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
+    Opers.push_back(DAG.getCondCode(getFCmpCondCode(FPCmp->getPredicate())));
     break;
   }
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SDValue Chain = getRoot();
-  SmallVector<EVT, 4> ValueVTs;
-  ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
-  ValueVTs.push_back(MVT::Other); // Out chain
+  }
 
   SDVTList VTs = DAG.getVTList(ValueVTs);
-  SDValue Result;
-  if (Opcode == ISD::STRICT_FP_ROUND)
-    Result = DAG.getNode(Opcode, sdl, VTs,
-                          { Chain, getValue(FPI.getArgOperand(0)),
-                               DAG.getTargetConstant(0, sdl,
-                               TLI.getPointerTy(DAG.getDataLayout())) });
-  else if (FPI.isUnaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs,
-                         { Chain, getValue(FPI.getArgOperand(0)) });
-  else if (FPI.isTernaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs,
-                         { Chain, getValue(FPI.getArgOperand(0)),
-                                  getValue(FPI.getArgOperand(1)),
-                                  getValue(FPI.getArgOperand(2)) });
-  else
-    Result = DAG.getNode(Opcode, sdl, VTs,
-                         { Chain, getValue(FPI.getArgOperand(0)),
-                           getValue(FPI.getArgOperand(1))  });
-
-  if (FPI.getExceptionBehavior() !=
-      ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
-    SDNodeFlags Flags;
-    Flags.setFPExcept(true);
-    Result->setFlags(Flags);
-  }
+  SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers);
 
   assert(Result.getNode()->getNumValues() == 2);
+
+  // Push node to the appropriate list so that future instructions can be
+  // chained up correctly.
   SDValue OutChain = Result.getValue(1);
-  DAG.setRoot(OutChain);
+  switch (FPI.getExceptionBehavior().getValue()) {
+  case fp::ExceptionBehavior::ebIgnore:
+    // The only reason why ebIgnore nodes still need to be chained is that
+    // they might depend on the current rounding mode, and therefore must
+    // not be moved across instruction that may change that mode.
+    LLVM_FALLTHROUGH;
+  case fp::ExceptionBehavior::ebMayTrap:
+    // These must not be moved across calls or instructions that may change
+    // floating-point exception masks.
+    PendingConstrainedFP.push_back(OutChain);
+    break;
+  case fp::ExceptionBehavior::ebStrict:
+    // These must not be moved across calls or instructions that may change
+    // floating-point exception masks or read floating-point exception flags.
+    // In addition, they cannot be optimized out even if unused.
+    PendingConstrainedFPStrict.push_back(OutChain);
+    break;
+  }
+
   SDValue FPResult = Result.getValue(0);
   setValue(&FPI, FPResult);
 }
@@ -7102,13 +7175,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   const Value *SwiftErrorVal = nullptr;
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
-  // We can't tail call inside a function with a swifterror argument. Lowering
-  // does not support this yet. It would have to move into the swifterror
-  // register before the call.
-  auto *Caller = CS.getInstruction()->getParent()->getParent();
-  if (TLI.supportSwiftError() &&
-      Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
-    isTailCall = false;
+  if (isTailCall) {
+    // Avoid emitting tail calls in functions with the disable-tail-calls
+    // attribute.
+    auto *Caller = CS.getInstruction()->getParent()->getParent();
+    if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
+        "true")
+      isTailCall = false;
+
+    // We can't tail call inside a function with a swifterror argument. Lowering
+    // does not support this yet. It would have to move into the swifterror
+    // register before the call.
+    if (TLI.supportSwiftError() &&
+        Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+      isTailCall = false;
+  }
 
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
@@ -7142,6 +7223,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       isTailCall = false;
   }
 
+  // If call site has a cfguardtarget operand bundle, create and add an
+  // additional ArgListEntry.
+  if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
+    TargetLowering::ArgListEntry Entry;
+    Value *V = Bundle->Inputs[0];
+    SDValue ArgNode = getValue(V);
+    Entry.Node = ArgNode;
+    Entry.Ty = V->getType();
+    Entry.IsCFGuardTarget = true;
+    Args.push_back(Entry);
+  }
+
   // Check if target-independent constraints permit a tail call here.
   // Target-dependent constraints are checked within TLI->LowerCallTo.
   if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
@@ -7374,7 +7467,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
   // In the mempcpy context we need to pass in a false value for isTailCall
   // because the return pointer needs to be adjusted by the size of
   // the copied memory.
-  SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+  SDValue Root = isVol ? getRoot() : getMemoryRoot();
+  SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol,
                              false, /*isTailCall=*/false,
                              MachinePointerInfo(I.getArgOperand(0)),
                              MachinePointerInfo(I.getArgOperand(1)));
@@ -7683,8 +7777,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
 
   // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
   // have to do anything here to lower funclet bundles.
-  assert(!I.hasOperandBundlesOtherThan(
-             {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+  // CFGuardTarget bundles are lowered in LowerCallTo.
+  assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
+                                        LLVMContext::OB_funclet,
+                                        LLVMContext::OB_cfguardtarget}) &&
          "Cannot lower calls with arbitrary operand bundles!");
 
   SDValue Callee = getValue(I.getCalledValue());
@@ -8182,10 +8278,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
     switch (OpInfo.Type) {
     case InlineAsm::isOutput:
-      if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
-          ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
-            OpInfo.ConstraintType == TargetLowering::C_Other) &&
-           OpInfo.isIndirect)) {
+      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
         unsigned ConstraintID =
             TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
         assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -8197,12 +8290,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
                                                         MVT::i32));
         AsmNodeOperands.push_back(OpInfo.CallOperand);
-        break;
-      } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
-                   OpInfo.ConstraintType == TargetLowering::C_Other) &&
-                  !OpInfo.isIndirect) ||
-                 OpInfo.ConstraintType == TargetLowering::C_Register ||
-                 OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
+      } else {
         // Otherwise, this outputs to a register (directly for C_Register /
         // C_RegisterClass, and a target-defined fashion for
         // C_Immediate/C_Other). Find a register that we can use.
@@ -8285,8 +8373,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       }
 
       // Treat indirect 'X' constraint as memory.
-      if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
-           OpInfo.ConstraintType == TargetLowering::C_Other) &&
+      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
           OpInfo.isIndirect)
         OpInfo.ConstraintType = TargetLowering::C_Memory;
 
@@ -8339,8 +8426,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       }
 
       assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
-              OpInfo.ConstraintType == TargetLowering::C_Register ||
-              OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
+              OpInfo.ConstraintType == TargetLowering::C_Register) &&
              "Unknown constraint type!");
 
       // TODO: Support this.
@@ -8678,7 +8764,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
   Callee = getValue(CI.getCalledValue());
   NullPtr = DAG.getIntPtrConstant(0, DL, true);
 
-  // The stackmap intrinsic only records the live variables (the arguemnts
+  // The stackmap intrinsic only records the live variables (the arguments
   // passed to it) and emits NOPS (if requested). Unlike the patchpoint
   // intrinsic, this won't be lowered to a function call. This means we don't
   // have to worry about calling conventions and target specific lowering code.
@@ -9027,6 +9113,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     Entry.IsReturned = false;
     Entry.IsSwiftSelf = false;
     Entry.IsSwiftError = false;
+    Entry.IsCFGuardTarget = false;
     Entry.Alignment = Align;
     CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
     CLI.NumFixedArgs += 1;
@@ -9139,6 +9226,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setSwiftSelf();
       if (Args[i].IsSwiftError)
         Flags.setSwiftError();
+      if (Args[i].IsCFGuardTarget)
+        Flags.setCFGuardTarget();
       if (Args[i].IsByVal)
         Flags.setByVal();
       if (Args[i].IsInAlloca) {
@@ -9214,9 +9303,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
 
       for (unsigned j = 0; j != NumParts; ++j) {
         // if it isn't first piece, alignment must be 1
+        // For scalable vectors the scalable part is currently handled
+        // by individual targets, so we just use the known minimum size here.
         ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
-                               i < CLI.NumFixedArgs,
-                               i, j*Parts[j].getValueType().getStoreSize());
+                    i < CLI.NumFixedArgs, i,
+                    j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
         if (NumParts > 1 && j == 0)
           MyFlags.Flags.setSplit();
         else if (j != 0) {
@@ -9487,7 +9578,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
 /// Try to elide argument copies from memory into a local alloca. Succeeds if
 /// ArgVal is a load from a suitable fixed stack object.
 static void tryToElideArgumentCopy(
-    FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+    FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
     DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
     SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
     ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
@@ -9507,9 +9598,9 @@ static void tryToElideArgumentCopy(
   assert(ArgCopyIter != ArgCopyElisionCandidates.end());
   const AllocaInst *AI = ArgCopyIter->second.first;
   int FixedIndex = FINode->getIndex();
-  int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+  int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
   int OldIndex = AllocaIndex;
-  MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+  MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
   if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
     LLVM_DEBUG(
         dbgs() << "  argument copy elision failed due to bad fixed stack "
@@ -9518,7 +9609,7 @@ static void tryToElideArgumentCopy(
   }
   unsigned RequiredAlignment = AI->getAlignment();
   if (!RequiredAlignment) {
-    RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+    RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment(
         AI->getAllocatedType());
   }
   if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
@@ -9584,7 +9675,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
   // flag to ask the target to give us the memory location of that argument if
   // available.
   ArgCopyElisionMapTy ArgCopyElisionCandidates;
-  findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+  findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
+                                    ArgCopyElisionCandidates);
 
   // Set up the incoming argument description vector.
   for (const Argument &Arg : F.args()) {
@@ -9685,8 +9777,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       unsigned NumRegs = TLI->getNumRegistersForCallingConv(
           *CurDAG->getContext(), F.getCallingConv(), VT);
       for (unsigned i = 0; i != NumRegs; ++i) {
+        // For scalable vectors, use the minimum size; individual targets
+        // are responsible for handling scalable vector arguments and
+        // return values.
         ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
-                              ArgNo, PartBase+i*RegisterVT.getStoreSize());
+                 ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
         if (NumRegs > 1 && i == 0)
           MyFlags.Flags.setSplit();
         // if it isn't first piece, alignment must be 1
@@ -9699,7 +9794,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       }
       if (NeedsRegBlock && Value == NumValues - 1)
         Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
-      PartBase += VT.getStoreSize();
+      PartBase += VT.getStoreSize().getKnownMinSize();
     }
   }
 
@@ -9769,7 +9864,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     // Elide the copying store if the target loaded this argument from a
     // suitable fixed stack object.
     if (Ins[i].Flags.isCopyElisionCandidate()) {
-      tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+      tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
                              ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
                              InVals[i], ArgHasUses);
     }
@@ -9795,7 +9890,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       unsigned NumParts = TLI->getNumRegistersForCallingConv(
           *CurDAG->getContext(), F.getCallingConv(), VT);
 
-      // Even an apparant 'unused' swifterror argument needs to be returned. So
+      // Even an apparent 'unused' swifterror argument needs to be returned. So
       // we do generate a copy for it that can be used on return from the
       // function.
       if (ArgHasUses || isSwiftErrorArg) {
@@ -10508,7 +10603,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     return;
   }
 
-  SL->findJumpTables(Clusters, &SI, DefaultMBB);
+  SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
   SL->findBitTestClusters(Clusters, &SI);
 
   LLVM_DEBUG({
@@ -10557,3 +10652,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
   }
 }
+
+void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
+  SDValue N = getValue(I.getOperand(0));
+  setValue(&I, N);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bfcf30b430b6..18e0edf7fc04 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -143,6 +143,20 @@ private:
   /// tokenfactor for them just before terminator instructions.
   SmallVector<SDValue, 8> PendingExports;
 
+  /// Similar to loads, nodes corresponding to constrained FP intrinsics are
+  /// bunched up and emitted when necessary.  These can be moved across each
+  /// other and any (normal) memory operation (load or store), but not across
+  /// calls or instructions having unspecified side effects.  As a special
+  /// case, constrained FP intrinsics using fpexcept.strict may not be deleted
+  /// even if otherwise unused, so they need to be chained before any
+  /// terminator instruction (like PendingExports).  We track the latter
+  /// set of nodes in a separate list.
+  SmallVector<SDValue, 8> PendingConstrainedFP;
+  SmallVector<SDValue, 8> PendingConstrainedFPStrict;
+
+  /// Update root to include all chains from the Pending list.
+  SDValue updateRoot(SmallVectorImpl<SDValue> &Pending);
+
   /// A unique monotonically increasing number used to order the SDNodes we
   /// create.
   unsigned SDNodeOrder;
@@ -447,12 +461,18 @@ public:
 
   /// Return the current virtual root of the Selection DAG, flushing any
   /// PendingLoad items. This must be done before emitting a store or any other
-  /// node that may need to be ordered after any prior load instructions.
+  /// memory node that may need to be ordered after any prior load instructions.
+  SDValue getMemoryRoot();
+
+  /// Similar to getMemoryRoot, but also flushes PendingConstrainedFP(Strict)
+  /// items. This must be done before emitting any call other any other node
+  /// that may need to be ordered after FP instructions due to other side
+  /// effects.
   SDValue getRoot();
 
   /// Similar to getRoot, but instead of flushing all the PendingLoad items,
-  /// flush all the PendingExports items. It is necessary to do this before
-  /// emitting a terminator instruction.
+  /// flush all the PendingExports (and PendingConstrainedFPStrict) items.
+  /// It is necessary to do this before emitting a terminator instruction.
   SDValue getControlRoot();
 
   SDLoc getCurSDLoc() const {
@@ -742,6 +762,7 @@ private:
   void visitAtomicStore(const StoreInst &I);
   void visitLoadFromSwiftError(const LoadInst &I);
   void visitStoreToSwiftError(const StoreInst &I);
+  void visitFreeze(const FreezeInst &I);
 
   void visitInlineAsm(ImmutableCallSite CS);
   void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index bc10f7621239..6fd71393bf38 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -186,7 +186,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FMINNUM_IEEE:               return "fminnum_ieee";
   case ISD::FMAXNUM_IEEE:               return "fmaxnum_ieee";
   case ISD::FMINIMUM:                   return "fminimum";
+  case ISD::STRICT_FMINIMUM:            return "strict_fminimum";
   case ISD::FMAXIMUM:                   return "fmaximum";
+  case ISD::STRICT_FMAXIMUM:            return "strict_fmaximum";
   case ISD::FNEG:                       return "fneg";
   case ISD::FSQRT:                      return "fsqrt";
   case ISD::STRICT_FSQRT:               return "strict_fsqrt";
@@ -270,6 +272,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STRICT_FPOWI:               return "strict_fpowi";
   case ISD::SETCC:                      return "setcc";
   case ISD::SETCCCARRY:                 return "setcccarry";
+  case ISD::STRICT_FSETCC:              return "strict_fsetcc";
+  case ISD::STRICT_FSETCCS:             return "strict_fsetccs";
   case ISD::SELECT:                     return "select";
   case ISD::VSELECT:                    return "vselect";
   case ISD::SELECT_CC:                  return "select_cc";
@@ -308,6 +312,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UMULFIX:                    return "umulfix";
   case ISD::UMULFIXSAT:                 return "umulfixsat";
 
+  case ISD::SDIVFIX:                    return "sdivfix";
+  case ISD::UDIVFIX:                    return "udivfix";
+
   // Conversion operators.
   case ISD::SIGN_EXTEND:                return "sign_extend";
   case ISD::ZERO_EXTEND:                return "zero_extend";
@@ -324,7 +331,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STRICT_FP_EXTEND:           return "strict_fp_extend";
 
   case ISD::SINT_TO_FP:                 return "sint_to_fp";
+  case ISD::STRICT_SINT_TO_FP:          return "strict_sint_to_fp";
   case ISD::UINT_TO_FP:                 return "uint_to_fp";
+  case ISD::STRICT_UINT_TO_FP:          return "strict_uint_to_fp";
   case ISD::FP_TO_SINT:                 return "fp_to_sint";
   case ISD::STRICT_FP_TO_SINT:          return "strict_fp_to_sint";
   case ISD::FP_TO_UINT:                 return "fp_to_uint";
@@ -541,6 +550,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (getFlags().hasVectorReduction())
     OS << " vector-reduction";
 
+  if (getFlags().hasNoFPExcept())
+    OS << " nofpexcept";
+
   if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
     if (!MN->memoperands_empty()) {
       OS << "<";
@@ -685,6 +697,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     if (doExt)
       OS << " from " << MLd->getMemoryVT().getEVTString();
 
+    const char *AM = getIndexedModeName(MLd->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
     if (MLd->isExpandingLoad())
       OS << ", expanding";
 
@@ -696,6 +712,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
     if (MSt->isTruncatingStore())
       OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
 
+    const char *AM = getIndexedModeName(MSt->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
     if (MSt->isCompressingStore())
       OS << ", compressing";
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1f07a241a824..6c57c72d47a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,8 +27,10 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/FastISel.h"
@@ -71,10 +73,12 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Pass.h"
@@ -147,17 +151,17 @@ static cl::opt<bool>
 ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
           cl::desc("Pop up a window to show dags before legalize types"));
 static cl::opt<bool>
-ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
-          cl::desc("Pop up a window to show dags before legalize"));
+    ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+                     cl::desc("Pop up a window to show dags before the post "
+                              "legalize types dag combine pass"));
+static cl::opt<bool>
+    ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+                     cl::desc("Pop up a window to show dags before legalize"));
 static cl::opt<bool>
 ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
           cl::desc("Pop up a window to show dags before the second "
                    "dag combine pass"));
 static cl::opt<bool>
-ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
-          cl::desc("Pop up a window to show dags before the post legalize types"
-                   " dag combine pass"));
-static cl::opt<bool>
 ViewISelDAGs("view-isel-dags", cl::Hidden,
           cl::desc("Pop up a window to show isel dags as they are selected"));
 static cl::opt<bool>
@@ -167,12 +171,10 @@ static cl::opt<bool>
 ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
       cl::desc("Pop up a window to show SUnit dags after they are processed"));
 #else
-static const bool ViewDAGCombine1 = false,
-                  ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
-                  ViewDAGCombine2 = false,
-                  ViewDAGCombineLT = false,
-                  ViewISelDAGs = false, ViewSchedDAGs = false,
-                  ViewSUnitDAGs = false;
+static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false,
+                  ViewDAGCombineLT = false, ViewLegalizeDAGs = false,
+                  ViewDAGCombine2 = false, ViewISelDAGs = false,
+                  ViewSchedDAGs = false, ViewSUnitDAGs = false;
 #endif
 
 //===---------------------------------------------------------------------===//
@@ -305,28 +307,22 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
-SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
-                                   CodeGenOpt::Level OL) :
-  MachineFunctionPass(ID), TM(tm),
-  FuncInfo(new FunctionLoweringInfo()),
-  SwiftError(new SwiftErrorValueTracking()),
-  CurDAG(new SelectionDAG(tm, OL)),
-  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
-  AA(), GFI(),
-  OptLevel(OL),
-  DAGSize(0) {
-    initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
-    initializeBranchProbabilityInfoWrapperPassPass(
-        *PassRegistry::getPassRegistry());
-    initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
-    initializeTargetLibraryInfoWrapperPassPass(
-        *PassRegistry::getPassRegistry());
-  }
+SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL)
+    : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()),
+      SwiftError(new SwiftErrorValueTracking()),
+      CurDAG(new SelectionDAG(tm, OL)),
+      SDB(std::make_unique<SelectionDAGBuilder>(*CurDAG, *FuncInfo, *SwiftError,
+                                                OL)),
+      AA(), GFI(), OptLevel(OL), DAGSize(0) {
+  initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+  initializeBranchProbabilityInfoWrapperPassPass(
+      *PassRegistry::getPassRegistry());
+  initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+  initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
 
 SelectionDAGISel::~SelectionDAGISel() {
-  delete SDB;
   delete CurDAG;
-  delete FuncInfo;
   delete SwiftError;
 }
 
@@ -340,6 +336,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetTransformInfoWrapperPass>();
   if (UseMBPI && OptLevel != CodeGenOpt::None)
     AU.addRequired<BranchProbabilityInfoWrapperPass>();
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
+  LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -442,13 +440,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
   LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
+              &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
+              nullptr;
 
   LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
 
   CurDAG->init(*MF, *ORE, this, LibInfo,
-   getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
+               getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI);
   FuncInfo->set(Fn, *MF, CurDAG);
   SwiftError->setFunction(*MF);
 
@@ -735,23 +737,20 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
 }
 
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
-  SmallPtrSet<SDNode*, 16> VisitedNodes;
+  SmallPtrSet<SDNode *, 16> Added;
   SmallVector<SDNode*, 128> Worklist;
 
   Worklist.push_back(CurDAG->getRoot().getNode());
+  Added.insert(CurDAG->getRoot().getNode());
 
   KnownBits Known;
 
   do {
     SDNode *N = Worklist.pop_back_val();
 
-    // If we've already seen this node, ignore it.
-    if (!VisitedNodes.insert(N).second)
-      continue;
-
     // Otherwise, add all chain operands to the worklist.
     for (const SDValue &Op : N->op_values())
-      if (Op.getValueType() == MVT::Other)
+      if (Op.getValueType() == MVT::Other && Added.insert(Op.getNode()).second)
         Worklist.push_back(Op.getNode());
 
     // If this is a CopyToReg with a vreg dest, process it.
@@ -793,8 +792,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
                        FuncInfo->MBB->getBasicBlock()->getName());
 #endif
 #ifdef NDEBUG
-  if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
-      ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+  if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewDAGCombineLT ||
+      ViewLegalizeDAGs || ViewDAGCombine2 || ViewISelDAGs || ViewSchedDAGs ||
       ViewSUnitDAGs)
 #endif
   {
@@ -1159,10 +1158,30 @@ void SelectionDAGISel::DoInstructionSelection() {
       // we convert them to normal FP opcodes instead at this point.  This
       // will allow them to be handled by existing target-specific instruction
       // selectors.
-      if (Node->isStrictFPOpcode() &&
-          (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
-           != TargetLowering::Legal))
-        Node = CurDAG->mutateStrictFPToFP(Node);
+      if (!TLI->isStrictFPEnabled() && Node->isStrictFPOpcode()) {
+        // For some opcodes, we need to call TLI->getOperationAction using
+        // the first operand type instead of the result type.  Note that this
+        // must match what SelectionDAGLegalize::LegalizeOp is doing.
+        EVT ActionVT;
+        switch (Node->getOpcode()) {
+        case ISD::STRICT_SINT_TO_FP:
+        case ISD::STRICT_UINT_TO_FP:
+        case ISD::STRICT_LRINT:
+        case ISD::STRICT_LLRINT:
+        case ISD::STRICT_LROUND:
+        case ISD::STRICT_LLROUND:
+        case ISD::STRICT_FSETCC:
+        case ISD::STRICT_FSETCCS:
+          ActionVT = Node->getOperand(1).getValueType();
+          break;
+        default:
+          ActionVT = Node->getValueType(0);
+          break;
+        }
+        if (TLI->getOperationAction(Node->getOpcode(), ActionVT)
+            == TargetLowering::Expand)
+          Node = CurDAG->mutateStrictFPToFP(Node);
+      }
 
       LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
                  Node->dump(CurDAG));
@@ -1280,20 +1299,20 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
 /// side-effect free and is either dead or folded into a generated instruction.
 /// Return false if it needs to be emitted.
 static bool isFoldedOrDeadInstruction(const Instruction *I,
-                                      FunctionLoweringInfo *FuncInfo) {
+                                      const FunctionLoweringInfo &FuncInfo) {
   return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
          !I->isTerminator() &&     // Terminators aren't folded.
-         !isa<DbgInfoIntrinsic>(I) &&  // Debug instructions aren't folded.
-         !I->isEHPad() &&              // EH pad instructions aren't folded.
-         !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
+         !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded.
+         !I->isEHPad() &&             // EH pad instructions aren't folded.
+         !FuncInfo.isExportedInst(I); // Exported instrs must be computed.
 }
 
 /// Collect llvm.dbg.declare information. This is done after argument lowering
 /// in case the declarations refer to arguments.
-static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
-  MachineFunction *MF = FuncInfo->MF;
+static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
+  MachineFunction *MF = FuncInfo.MF;
   const DataLayout &DL = MF->getDataLayout();
-  for (const BasicBlock &BB : *FuncInfo->Fn) {
+  for (const BasicBlock &BB : *FuncInfo.Fn) {
     for (const Instruction &I : BB) {
       const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I);
       if (!DI)
@@ -1315,11 +1334,11 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
       // intrinsic and handle this during isel like dbg.value.
       int FI = std::numeric_limits<int>::max();
       if (const auto *AI = dyn_cast<AllocaInst>(Address)) {
-        auto SI = FuncInfo->StaticAllocaMap.find(AI);
-        if (SI != FuncInfo->StaticAllocaMap.end())
+        auto SI = FuncInfo.StaticAllocaMap.find(AI);
+        if (SI != FuncInfo.StaticAllocaMap.end())
           FI = SI->second;
       } else if (const auto *Arg = dyn_cast<Argument>(Address))
-        FI = FuncInfo->getArgumentFrameIndex(Arg);
+        FI = FuncInfo.getArgumentFrameIndex(Arg);
 
       if (FI == std::numeric_limits<int>::max())
         continue;
@@ -1353,7 +1372,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()];
   FuncInfo->InsertPt = FuncInfo->MBB->begin();
 
-  CurDAG->setFunctionLoweringInfo(FuncInfo);
+  CurDAG->setFunctionLoweringInfo(FuncInfo.get());
 
   if (!FastIS) {
     LowerArguments(Fn);
@@ -1393,7 +1412,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   if (FastIS && Inserted)
     FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
 
-  processDbgDeclares(FuncInfo);
+  processDbgDeclares(*FuncInfo);
 
   // Iterate over all basic blocks in the function.
   StackProtector &SP = getAnalysis<StackProtector>();
@@ -1453,7 +1472,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
         const Instruction *Inst = &*std::prev(BI);
 
         // If we no longer require this instruction, skip it.
-        if (isFoldedOrDeadInstruction(Inst, FuncInfo) ||
+        if (isFoldedOrDeadInstruction(Inst, *FuncInfo) ||
             ElidedArgCopyInstrs.count(Inst)) {
           --NumFastIselRemaining;
           continue;
@@ -1473,7 +1492,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           const Instruction *BeforeInst = Inst;
           while (BeforeInst != &*Begin) {
             BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
-            if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
+            if (!isFoldedOrDeadInstruction(BeforeInst, *FuncInfo))
               break;
           }
           if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
@@ -1589,7 +1608,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       // But if FastISel was run, we already selected some of the block.
       // If we emitted a tail-call, we need to delete any previously emitted
       // instruction that follows it.
-      if (HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
+      if (FastIS && HadTailCall && FuncInfo->InsertPt != FuncInfo->MBB->end())
         FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end());
     }
 
@@ -2230,10 +2249,13 @@ void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
 
 void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
   SDLoc dl(Op);
-  MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
-  const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+  MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+  const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+  EVT VT = Op->getValueType(0);
+  LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
   Register Reg =
-      TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
+      TLI->getRegisterByName(RegStr->getString().data(), Ty,
                              CurDAG->getMachineFunction());
   SDValue New = CurDAG->getCopyFromReg(
                         Op->getOperand(0), dl, Reg, Op->getValueType(0));
@@ -2244,10 +2266,13 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
 
 void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
   SDLoc dl(Op);
-  MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
-  const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
-  Register Reg = TLI->getRegisterByName(RegStr->getString().data(),
-                                        Op->getOperand(2).getValueType(),
+  MDNodeSDNode *MD = cast<MDNodeSDNode>(Op->getOperand(1));
+  const MDString *RegStr = cast<MDString>(MD->getMD()->getOperand(0));
+
+  EVT VT = Op->getOperand(2).getValueType();
+  LLT Ty = VT.isSimple() ? getLLTForMVT(VT.getSimpleVT()) : LLT();
+
+  Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty,
                                         CurDAG->getMachineFunction());
   SDValue New = CurDAG->getCopyToReg(
                         Op->getOperand(0), dl, Reg, Op->getOperand(2));
@@ -3176,13 +3201,19 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
     case OPC_CheckFoldableChainNode: {
       assert(NodeStack.size() != 1 && "No parent node");
       // Verify that all intermediate nodes between the root and this one have
-      // a single use.
+      // a single use (ignoring chains, which are handled in UpdateChains).
       bool HasMultipleUses = false;
-      for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
-        if (!NodeStack[i].getNode()->hasOneUse()) {
-          HasMultipleUses = true;
-          break;
-        }
+      for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) {
+        unsigned NNonChainUses = 0;
+        SDNode *NS = NodeStack[i].getNode();
+        for (auto UI = NS->use_begin(), UE = NS->use_end(); UI != UE; ++UI)
+          if (UI.getUse().getValueType() != MVT::Other)
+            if (++NNonChainUses > 1) {
+              HasMultipleUses = true;
+              break;
+            }
+        if (HasMultipleUses) break;
+      }
       if (HasMultipleUses) break;
 
       // Check to see that the target thinks this is profitable to fold and that
@@ -3433,6 +3464,17 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
       if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
         Ops.push_back(InputGlue);
 
+      // Check whether any matched node could raise an FP exception.  Since all
+      // such nodes must have a chain, it suffices to check ChainNodesMatched.
+      // We need to perform this check before potentially modifying one of the
+      // nodes via MorphNode.
+      bool MayRaiseFPException = false;
+      for (auto *N : ChainNodesMatched)
+        if (mayRaiseFPException(N) && !N->getFlags().hasNoFPExcept()) {
+          MayRaiseFPException = true;
+          break;
+        }
+
       // Create the node.
       MachineSDNode *Res = nullptr;
       bool IsMorphNodeTo = Opcode == OPC_MorphNodeTo ||
@@ -3464,6 +3506,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
                                             Ops, EmitNodeInfo));
       }
 
+      // Set the NoFPExcept flag when no original matched node could
+      // raise an FP exception, but the new node potentially might.
+      if (!MayRaiseFPException && mayRaiseFPException(Res)) {
+        SDNodeFlags Flags = Res->getFlags();
+        Flags.setNoFPExcept(true);
+        Res->setFlags(Flags);
+      }
+
       // If the node had chain/glue results, update our notion of the current
       // chain and glue.
       if (EmitNodeInfo & OPFL_GlueOutput) {
@@ -3619,6 +3669,21 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
   }
 }
 
+/// Return whether the node may raise an FP exception.
+bool SelectionDAGISel::mayRaiseFPException(SDNode *N) const {
+  // For machine opcodes, consult the MCID flag.
+  if (N->isMachineOpcode()) {
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    return MCID.mayRaiseFPException();
+  }
+
+  // For ISD opcodes, only StrictFP opcodes may raise an FP
+  // exception.
+  if (N->isTargetOpcode())
+    return N->isTargetStrictFPOpcode();
+  return N->isStrictFPOpcode();
+}
+
 bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
   assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
   auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index fad98b6f50dc..c628f379e415 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -384,7 +384,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
     // can consider allowing spills of smaller values to larger slots
     // (i.e. change the '==' in the assert below to a '>=').
     MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
-    assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
+    assert((MFI.getObjectSize(Index) * 8) ==
+           (int64_t)Incoming.getValueSizeInBits() &&
            "Bad spill:  stack slot does not match!");
 
     // Note: Using the alignment of the spill slot (rather than the abi or
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9ab1324533f1..24ab65171a17 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -52,6 +52,10 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
                                           SDValue &Chain) const {
   const Function &F = DAG.getMachineFunction().getFunction();
 
+  // First, check if tail calls have been disabled in this function.
+  if (F.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
+    return false;
+
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore NoAlias and NonNull because they don't affect the
   // call sequence.
@@ -122,7 +126,11 @@ std::pair<SDValue, SDValue>
 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
                             ArrayRef<SDValue> Ops,
                             MakeLibCallOptions CallOptions,
-                            const SDLoc &dl) const {
+                            const SDLoc &dl,
+                            SDValue InChain) const {
+  if (!InChain)
+    InChain = DAG.getEntryNode();
+
   TargetLowering::ArgListTy Args;
   Args.reserve(Ops.size());
 
@@ -158,7 +166,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
   }
 
   CLI.setDebugLoc(dl)
-      .setChain(DAG.getEntryNode())
+      .setChain(InChain)
       .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
       .setNoReturn(CallOptions.DoesNotReturn)
       .setDiscardResult(!CallOptions.IsReturnValueUsed)
@@ -277,6 +285,22 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
                                          ISD::CondCode &CCCode,
                                          const SDLoc &dl, const SDValue OldLHS,
                                          const SDValue OldRHS) const {
+  SDValue Chain;
+  return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
+                             OldRHS, Chain);
+}
+
+void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
+                                         SDValue &NewLHS, SDValue &NewRHS,
+                                         ISD::CondCode &CCCode,
+                                         const SDLoc &dl, const SDValue OldLHS,
+                                         const SDValue OldRHS,
+                                         SDValue &Chain,
+                                         bool IsSignaling) const {
+  // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
+  // not supporting it. We can update this code when libgcc provides such
+  // functions.
+
   assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
          && "Unsupported setcc type!");
 
@@ -320,25 +344,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
           (VT == MVT::f64) ? RTLIB::OGT_F64 :
           (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
     break;
+  case ISD::SETO:
+    ShouldInvertCC = true;
+    LLVM_FALLTHROUGH;
   case ISD::SETUO:
     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
           (VT == MVT::f64) ? RTLIB::UO_F64 :
           (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
     break;
-  case ISD::SETO:
-    LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
-          (VT == MVT::f64) ? RTLIB::O_F64 :
-          (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
-    break;
   case ISD::SETONE:
-    // SETONE = SETOLT | SETOGT
-    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
-          (VT == MVT::f64) ? RTLIB::OLT_F64 :
-          (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
-    LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
-          (VT == MVT::f64) ? RTLIB::OGT_F64 :
-          (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
-    break;
+    // SETONE = O && UNE
+    ShouldInvertCC = true;
+    LLVM_FALLTHROUGH;
   case ISD::SETUEQ:
     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
           (VT == MVT::f64) ? RTLIB::UO_F64 :
@@ -382,24 +399,33 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   EVT OpsVT[2] = { OldLHS.getValueType(),
                    OldRHS.getValueType() };
   CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
-  NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
+  auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
+  NewLHS = Call.first;
   NewRHS = DAG.getConstant(0, dl, RetVT);
 
   CCCode = getCmpLibcallCC(LC1);
-  if (ShouldInvertCC)
-    CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
-
-  if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
-    SDValue Tmp = DAG.getNode(
-        ISD::SETCC, dl,
-        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
-        NewLHS, NewRHS, DAG.getCondCode(CCCode));
-    NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
-    NewLHS = DAG.getNode(
-        ISD::SETCC, dl,
-        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
-        NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
-    NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+  if (ShouldInvertCC) {
+    assert(RetVT.isInteger());
+    CCCode = getSetCCInverse(CCCode, RetVT);
+  }
+
+  if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
+    // Update Chain.
+    Chain = Call.second;
+  } else {
+    EVT SetCCVT =
+        getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
+    SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
+    auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
+    CCCode = getCmpLibcallCC(LC2);
+    if (ShouldInvertCC)
+      CCCode = getSetCCInverse(CCCode, RetVT);
+    NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
+    if (Chain)
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
+                          Call2.second);
+    NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
+                         Tmp.getValueType(), Tmp, NewLHS);
     NewRHS = SDValue();
   }
 }
@@ -693,6 +719,27 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
       return Op.getOperand(1);
     break;
   }
+  case ISD::SETCC: {
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+    // If (1) we only need the sign-bit, (2) the setcc operands are the same
+    // width as the setcc result, and (3) the result of a setcc conforms to 0 or
+    // -1, we may be able to bypass the setcc.
+    if (DemandedBits.isSignMask() &&
+        Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() &&
+        getBooleanContents(Op0.getValueType()) ==
+            BooleanContent::ZeroOrNegativeOneBooleanContent) {
+      // If we're testing X < 0, then this compare isn't needed - just use X!
+      // FIXME: We're limiting to integer types here, but this should also work
+      // if we don't care about FP signed-zero. The use of SETLT with FP means
+      // that we don't care about NaNs.
+      if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
+          (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
+        return Op0;
+    }
+    break;
+  }
   case ISD::SIGN_EXTEND_INREG: {
     // If none of the extended bits are demanded, eliminate the sextinreg.
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1251,7 +1298,7 @@ bool TargetLowering::SimplifyDemandedBits(
     // -1, we may be able to bypass the setcc.
     if (DemandedBits.isSignMask() &&
         Op0.getScalarValueSizeInBits() == BitWidth &&
-        getBooleanContents(VT) ==
+        getBooleanContents(Op0.getValueType()) ==
             BooleanContent::ZeroOrNegativeOneBooleanContent) {
       // If we're testing X < 0, then this compare isn't needed - just use X!
       // FIXME: We're limiting to integer types here, but this should also work
@@ -1538,6 +1585,16 @@ bool TargetLowering::SimplifyDemandedBits(
     Known.Zero = Known2.Zero.reverseBits();
     break;
   }
+  case ISD::BSWAP: {
+    SDValue Src = Op.getOperand(0);
+    APInt DemandedSrcBits = DemandedBits.byteSwap();
+    if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+                             Depth + 1))
+      return true;
+    Known.One = Known2.One.byteSwap();
+    Known.Zero = Known2.Zero.byteSwap();
+    break;
+  }
   case ISD::SIGN_EXTEND_INREG: {
     SDValue Op0 = Op.getOperand(0);
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1753,15 +1810,11 @@ bool TargetLowering::SimplifyDemandedBits(
           // undesirable.
           break;
 
-        auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
-        if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
+        SDValue ShAmt = Src.getOperand(1);
+        auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt);
+        if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
           break;
-
-        SDValue Shift = Src.getOperand(1);
-        uint64_t ShVal = ShAmt->getZExtValue();
-
-        if (TLO.LegalTypes())
-          Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
+        uint64_t ShVal = ShAmtC->getZExtValue();
 
         APInt HighBits =
             APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
@@ -1771,10 +1824,12 @@ bool TargetLowering::SimplifyDemandedBits(
         if (!(HighBits & DemandedBits)) {
           // None of the shifted in bits are needed.  Add a truncate of the
           // shift input, then shift it.
+          if (TLO.LegalTypes())
+            ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
           SDValue NewTrunc =
               TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
           return TLO.CombineTo(
-              Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
+              Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt));
         }
         break;
       }
@@ -1818,6 +1873,17 @@ bool TargetLowering::SimplifyDemandedBits(
                              Depth + 1))
       return true;
 
+    // Attempt to avoid multi-use ops if we don't need anything from them.
+    if (!DemandedSrcBits.isAllOnesValue() ||
+        !DemandedSrcElts.isAllOnesValue()) {
+      if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+              Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+        SDValue NewOp =
+            TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
+        return TLO.CombineTo(Op, NewOp);
+      }
+    }
+
     Known = Known2;
     if (BitWidth > EltBitWidth)
       Known = Known.zext(BitWidth, false /* => any extend */);
@@ -2808,7 +2874,8 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
     // Note that where Y is variable and is known to have at most one bit set
     // (for example, if it is Z & 1) we cannot do this; the expressions are not
     // equivalent when Y == 0.
-    Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+    assert(OpVT.isInteger());
+    Cond = ISD::getSetCCInverse(Cond, OpVT);
     if (DCI.isBeforeLegalizeOps() ||
         isCondCodeLegal(Cond, N0.getSimpleValueType()))
       return DAG.getSetCC(DL, VT, N0, Zero, Cond);
@@ -2897,7 +2964,8 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
     // What if we invert constants? (and the target predicate)
     I1.negate();
     I01.negate();
-    NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
+    assert(XVT.isInteger());
+    NewCond = getSetCCInverse(NewCond, XVT);
     if (!checkConstants())
       return SDValue();
     // Great, e.g. got  icmp uge i16 (add i16 %x, -128), -256
@@ -3052,6 +3120,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                       DAGCombinerInfo &DCI,
                                       const SDLoc &dl) const {
   SelectionDAG &DAG = DCI.DAG;
+  const DataLayout &Layout = DAG.getDataLayout();
   EVT OpVT = N0.getValueType();
 
   // Constant fold or commute setcc.
@@ -3132,7 +3201,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
         SDValue Zero = DAG.getConstant(0, dl, CTVT);
         SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
-        ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+        assert(CTVT.isInteger());
+        ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
         SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
         SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
         SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
@@ -3223,7 +3293,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
           ISD::CondCode InvCond = ISD::getSetCCInverse(
               cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
-              TopSetCC.getOperand(0).getValueType().isInteger());
+              TopSetCC.getOperand(0).getValueType());
           return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
                                       TopSetCC.getOperand(1),
                                       InvCond);
@@ -3256,7 +3326,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           APInt newMask = APInt::getLowBitsSet(maskWidth, width);
           for (unsigned offset=0; offset<origWidth/width; offset++) {
             if (Mask.isSubsetOf(newMask)) {
-              if (DAG.getDataLayout().isLittleEndian())
+              if (Layout.isLittleEndian())
                 bestOffset = (uint64_t)offset * (width/8);
               else
                 bestOffset = (origWidth/width - offset - 1) * (width/8);
@@ -3272,11 +3342,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
         if (newVT.isRound() &&
             shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
-          EVT PtrType = Lod->getOperand(1).getValueType();
           SDValue Ptr = Lod->getBasePtr();
           if (bestOffset != 0)
-            Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
-                              DAG.getConstant(bestOffset, dl, PtrType));
+            Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl);
           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
           SDValue NewLoad = DAG.getLoad(
               newVT, dl, Lod->getChain(), Ptr,
@@ -3332,8 +3400,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (DCI.isBeforeLegalizeOps() ||
             (isOperationLegal(ISD::SETCC, newVT) &&
              isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
-          EVT NewSetCCVT =
-              getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
+          EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
           SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
 
           SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
@@ -3379,14 +3446,16 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
       if (N0.getOpcode() == ISD::SETCC &&
-          isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+          isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
+          (N0.getValueType() == MVT::i1 ||
+           getBooleanContents(N0.getOperand(0).getValueType()) ==
+                       ZeroOrOneBooleanContent)) {
         bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
         if (TrueWhenTrue)
           return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
         // Invert the condition.
         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
-        CC = ISD::getSetCCInverse(CC,
-                                  N0.getOperand(0).getValueType().isInteger());
+        CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
         if (DCI.isBeforeLegalizeOps() ||
             isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
           return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
@@ -3420,10 +3489,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           return DAG.getSetCC(dl, VT, Val, N1,
                               Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
         }
-      } else if (N1C->isOne() &&
-                 (VT == MVT::i1 ||
-                  getBooleanContents(N0->getValueType(0)) ==
-                      ZeroOrOneBooleanContent)) {
+      } else if (N1C->isOne()) {
         SDValue Op0 = N0;
         if (Op0.getOpcode() == ISD::TRUNCATE)
           Op0 = Op0.getOperand(0);
@@ -3431,10 +3497,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if ((Op0.getOpcode() == ISD::XOR) &&
             Op0.getOperand(0).getOpcode() == ISD::SETCC &&
             Op0.getOperand(1).getOpcode() == ISD::SETCC) {
-          // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
-          Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
-          return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
-                              Cond);
+          SDValue XorLHS = Op0.getOperand(0);
+          SDValue XorRHS = Op0.getOperand(1);
+          // Ensure that the input setccs return an i1 type or 0/1 value.
+          if (Op0.getValueType() == MVT::i1 ||
+              (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
+                      ZeroOrOneBooleanContent &&
+               getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
+                        ZeroOrOneBooleanContent)) {
+            // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+            Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+            return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
+          }
         }
         if (Op0.getOpcode() == ISD::AND &&
             isa<ConstantSDNode>(Op0.getOperand(1)) &&
@@ -3611,14 +3685,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
         (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
         N0.getOpcode() == ISD::AND) {
-      auto &DL = DAG.getDataLayout();
       if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
-        EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
+        EVT ShiftTy =
+            getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
           unsigned ShCt = AndRHS->getAPIntValue().logBase2();
           if (AndRHS->getAPIntValue().isPowerOf2() &&
-              ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+              !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
             return DAG.getNode(ISD::TRUNCATE, dl, VT,
                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
                                            DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3628,7 +3702,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           // Perform the xform if C1 is a single bit.
           unsigned ShCt = C1.logBase2();
           if (C1.isPowerOf2() &&
-              ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
+              !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
             return DAG.getNode(ISD::TRUNCATE, dl, VT,
                                DAG.getNode(ISD::SRL, dl, ShValTy, N0,
                                            DAG.getConstant(ShCt, dl, ShiftTy)));
@@ -3639,6 +3713,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     if (C1.getMinSignedBits() <= 64 &&
         !isLegalICmpImmediate(C1.getSExtValue())) {
+      EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
       // (X & -256) == 256 -> (X >> 8) == 1
       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
@@ -3646,15 +3721,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           const APInt &AndRHSC = AndRHS->getAPIntValue();
           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
             unsigned ShiftBits = AndRHSC.countTrailingZeros();
-            auto &DL = DAG.getDataLayout();
-            EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
-                                           !DCI.isBeforeLegalize());
-            EVT CmpTy = N0.getValueType();
-            SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
-                                        DAG.getConstant(ShiftBits, dl,
-                                                        ShiftTy));
-            SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
-            return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+            if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+              SDValue Shift =
+                DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
+                            DAG.getConstant(ShiftBits, dl, ShiftTy));
+              SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
+              return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
+            }
           }
         }
       } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
@@ -3676,14 +3749,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
         NewC.lshrInPlace(ShiftBits);
         if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
-          isLegalICmpImmediate(NewC.getSExtValue())) {
-          auto &DL = DAG.getDataLayout();
-          EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
-                                         !DCI.isBeforeLegalize());
-          EVT CmpTy = N0.getValueType();
-          SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
+            isLegalICmpImmediate(NewC.getSExtValue()) &&
+            !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
+          SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
                                       DAG.getConstant(ShiftBits, dl, ShiftTy));
-          SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
+          SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
           return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
         }
       }
@@ -4480,6 +4550,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
     TargetLowering::ConstraintType CType =
       TLI.getConstraintType(OpInfo.Codes[i]);
 
+    // Indirect 'other' or 'immediate' constraints are not allowed.
+    if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
+                               CType == TargetLowering::C_Register ||
+                               CType == TargetLowering::C_RegisterClass))
+      continue;
+
     // If this is an 'other' or 'immediate' constraint, see if the operand is
     // valid for it. For example, on X86 we might have an 'rI' constraint. If
     // the operand is an integer in the range [0..31] we want to use I (saving a
@@ -4905,7 +4981,7 @@ SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
                                         ISD::CondCode Cond,
                                         DAGCombinerInfo &DCI,
                                         const SDLoc &DL) const {
-  SmallVector<SDNode *, 2> Built;
+  SmallVector<SDNode *, 5> Built;
   if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
                                          DCI, DL, Built)) {
     for (SDNode *N : Built)
@@ -4940,26 +5016,44 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
   if (!isOperationLegalOrCustom(ISD::MUL, VT))
     return SDValue();
 
-  // TODO: Could support comparing with non-zero too.
-  ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
-  if (!CompTarget || !CompTarget->isNullValue())
-    return SDValue();
-
-  bool HadOneDivisor = false;
-  bool AllDivisorsAreOnes = true;
+  bool ComparingWithAllZeros = true;
+  bool AllComparisonsWithNonZerosAreTautological = true;
+  bool HadTautologicalLanes = false;
+  bool AllLanesAreTautological = true;
   bool HadEvenDivisor = false;
   bool AllDivisorsArePowerOfTwo = true;
-  SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+  bool HadTautologicalInvertedLanes = false;
+  SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
 
-  auto BuildUREMPattern = [&](ConstantSDNode *C) {
+  auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
     // Division by 0 is UB. Leave it to be constant-folded elsewhere.
-    if (C->isNullValue())
+    if (CDiv->isNullValue())
       return false;
 
-    const APInt &D = C->getAPIntValue();
-    // If all divisors are ones, we will prefer to avoid the fold.
-    HadOneDivisor |= D.isOneValue();
-    AllDivisorsAreOnes &= D.isOneValue();
+    const APInt &D = CDiv->getAPIntValue();
+    const APInt &Cmp = CCmp->getAPIntValue();
+
+    ComparingWithAllZeros &= Cmp.isNullValue();
+
+    // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+    // if C2 is not less than C1, the comparison is always false.
+    // But we will only be able to produce the comparison that will give the
+    // opposive tautological answer. So this lane would need to be fixed up.
+    bool TautologicalInvertedLane = D.ule(Cmp);
+    HadTautologicalInvertedLanes |= TautologicalInvertedLane;
+
+    // If all lanes are tautological (either all divisors are ones, or divisor
+    // is not greater than the constant we are comparing with),
+    // we will prefer to avoid the fold.
+    bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+    HadTautologicalLanes |= TautologicalLane;
+    AllLanesAreTautological &= TautologicalLane;
+
+    // If we are comparing with non-zero, we need'll need  to subtract said
+    // comparison value from the LHS. But there is no point in doing that if
+    // every lane where we are comparing with non-zero is tautological..
+    if (!Cmp.isNullValue())
+      AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
 
     // Decompose D into D0 * 2^K
     unsigned K = D.countTrailingZeros();
@@ -4981,19 +5075,27 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
     assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
     assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
 
-    // Q = floor((2^W - 1) / D)
-    APInt Q = APInt::getAllOnesValue(W).udiv(D);
+    // Q = floor((2^W - 1) u/ D)
+    // R = ((2^W - 1) u% D)
+    APInt Q, R;
+    APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+
+    // If we are comparing with zero, then that comparison constant is okay,
+    // else it may need to be one less than that.
+    if (Cmp.ugt(R))
+      Q -= 1;
 
     assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
            "We are expecting that K is always less than all-ones for ShSVT");
 
-    // If the divisor is 1 the result can be constant-folded.
-    if (D.isOneValue()) {
+    // If the lane is tautological the result can be constant-folded.
+    if (TautologicalLane) {
       // Set P and K amount to a bogus values so we can try to splat them.
       P = 0;
       K = -1;
-      assert(Q.isAllOnesValue() &&
-             "Expecting all-ones comparison for one divisor");
+      // And ensure that comparison constant is tautological,
+      // it will always compare true/false.
+      Q = -1;
     }
 
     PAmts.push_back(DAG.getConstant(P, DL, SVT));
@@ -5007,11 +5109,11 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
   SDValue D = REMNode.getOperand(1);
 
   // Collect the values from each element.
-  if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+  if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
     return SDValue();
 
-  // If this is a urem by a one, avoid the fold since it can be constant-folded.
-  if (AllDivisorsAreOnes)
+  // If all lanes are tautological, the result can be constant-folded.
+  if (AllLanesAreTautological)
     return SDValue();
 
   // If this is a urem by a powers-of-two, avoid the fold since it can be
@@ -5021,7 +5123,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
 
   SDValue PVal, KVal, QVal;
   if (VT.isVector()) {
-    if (HadOneDivisor) {
+    if (HadTautologicalLanes) {
       // Try to turn PAmts into a splat, since we don't care about the values
       // that are currently '0'. If we can't, just keep '0'`s.
       turnVectorIntoSplatVector(PAmts, isNullConstant);
@@ -5040,6 +5142,14 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
     QVal = QAmts[0];
   }
 
+  if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
+    if (!isOperationLegalOrCustom(ISD::SUB, VT))
+      return SDValue(); // FIXME: Could/should use `ISD::ADD`?
+    assert(CompTargetNode.getValueType() == N.getValueType() &&
+           "Expecting that the types on LHS and RHS of comparisons match.");
+    N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
+  }
+
   // (mul N, P)
   SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
   Created.push_back(Op0.getNode());
@@ -5058,8 +5168,41 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
   }
 
   // UREM: (setule/setugt (rotr (mul N, P), K), Q)
-  return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
-                      ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+  SDValue NewCC =
+      DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+                   ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+  if (!HadTautologicalInvertedLanes)
+    return NewCC;
+
+  // If any lanes previously compared always-false, the NewCC will give
+  // always-true result for them, so we need to fixup those lanes.
+  // Or the other way around for inequality predicate.
+  assert(VT.isVector() && "Can/should only get here for vectors.");
+  Created.push_back(NewCC.getNode());
+
+  // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
+  // if C2 is not less than C1, the comparison is always false.
+  // But we have produced the comparison that will give the
+  // opposive tautological answer. So these lanes would need to be fixed up.
+  SDValue TautologicalInvertedChannels =
+      DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
+  Created.push_back(TautologicalInvertedChannels.getNode());
+
+  if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
+    // If we have a vector select, let's replace the comparison results in the
+    // affected lanes with the correct tautological result.
+    SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
+                                              DL, SETCCVT, SETCCVT);
+    return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
+                       Replacement, NewCC);
+  }
+
+  // Else, we can just invert the comparison result in the appropriate lanes.
+  if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
+    return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
+                       TautologicalInvertedChannels);
+
+  return SDValue(); // Don't know how to lower.
 }
 
 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
@@ -5544,7 +5687,14 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
                                  ForCodeSize, Depth + 1);
     char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
                                  ForCodeSize, Depth + 1);
-    if (V0 >= V1) {
+    // TODO: This is a hack. It is possible that costs have changed between now
+    //       and the initial calls to isNegatibleForFree(). That is because we
+    //       are rewriting the expression, and that may change the number of
+    //       uses (and therefore the cost) of values. If the negation costs are
+    //       equal, only negate this value if it is a constant. Otherwise, try
+    //       operand 1. A better fix would eliminate uses as a cost factor or
+    //       track the change in uses as we rewrite the expression.
+    if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) {
       // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
       SDValue Neg0 = getNegatedExpression(
           Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
@@ -5954,6 +6104,8 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
   EVT DstVT = Node->getValueType(0);
   EVT SetCCVT =
       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+  EVT DstSetCCVT =
+      getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
 
   // Only expand vector types if we have the appropriate vector bit operations.
   unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : 
@@ -5980,7 +6132,15 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
   }
 
   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
-  SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+  SDValue Sel;
+
+  if (Node->isStrictFPOpcode()) {
+    Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
+                       Node->getOperand(0), /*IsSignaling*/ true);
+    Chain = Sel.getValue(1);
+  } else {
+    Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
+  }
 
   bool Strict = Node->isStrictFPOpcode() ||
                 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
@@ -5989,28 +6149,29 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
     // signmask then offset (the result of which should be fully representable).
     // Sel = Src < 0x8000000000000000
-    // Val = select Sel, Src, Src - 0x8000000000000000
-    // Ofs = select Sel, 0, 0x8000000000000000
-    // Result = fp_to_sint(Val) ^ Ofs
+    // FltOfs = select Sel, 0, 0x8000000000000000
+    // IntOfs = select Sel, 0, 0x8000000000000000
+    // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
 
     // TODO: Should any fast-math-flags be set for the FSUB?
-    SDValue SrcBiased;
-    if (Node->isStrictFPOpcode())
-      SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, 
-                              { Node->getOperand(0), Src, Cst });
-    else
-      SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
-    SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
-    SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
-                                DAG.getConstant(SignMask, dl, DstVT));
+    SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
+                                   DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+    Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+    SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
+                                   DAG.getConstant(0, dl, DstVT),
+                                   DAG.getConstant(SignMask, dl, DstVT));
     SDValue SInt;
     if (Node->isStrictFPOpcode()) {
+      SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, 
+                                { Chain, Src, FltOfs });
       SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, 
-                         { SrcBiased.getValue(1), Val });
+                         { Val.getValue(1), Val });
       Chain = SInt.getValue(1);
-    } else
+    } else {
+      SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
       SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
-    Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
+    }
+    Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
   } else {
     // Expand based on maximum range of FP_TO_SINT:
     // True = fp_to_sint(Src)
@@ -6023,14 +6184,17 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
                                 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
     False = DAG.getNode(ISD::XOR, dl, DstVT, False,
                         DAG.getConstant(SignMask, dl, DstVT));
+    Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
     Result = DAG.getSelect(dl, DstVT, Sel, True, False);
   }
   return true;
 }
 
 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
+                                      SDValue &Chain,
                                       SelectionDAG &DAG) const {
-  SDValue Src = Node->getOperand(0);
+  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+  SDValue Src = Node->getOperand(OpNo);
   EVT SrcVT = Src.getValueType();
   EVT DstVT = Node->getValueType(0);
 
@@ -6052,17 +6216,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
       return false;
 
     // For unsigned conversions, convert them to signed conversions using the
-    // algorithm from the x86_64 __floatundidf in compiler_rt.
-    SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
-
-    SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
-    SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
-    SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
-    SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
-    SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
-
-    SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
-    SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+    // algorithm from the x86_64 __floatundisf in compiler_rt.
 
     // TODO: This really should be implemented using a branch rather than a
     // select.  We happen to get lucky and machinesink does the right
@@ -6073,6 +6227,37 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
 
     SDValue SignBitTest = DAG.getSetCC(
         dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT);
+
+    SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT);
+    SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst);
+    SDValue AndConst = DAG.getConstant(1, dl, SrcVT);
+    SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst);
+    SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr);
+
+    SDValue Slow, Fast;
+    if (Node->isStrictFPOpcode()) {
+      // In strict mode, we must avoid spurious exceptions, and therefore
+      // must make sure to only emit a single STRICT_SINT_TO_FP.
+      SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src);
+      Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other },
+                         { Node->getOperand(0), InCvt });
+      Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other },
+                         { Fast.getValue(1), Fast, Fast });
+      Chain = Slow.getValue(1);
+      // The STRICT_SINT_TO_FP inherits the exception mode from the
+      // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can
+      // never raise any exception.
+      SDNodeFlags Flags;
+      Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept());
+      Fast->setFlags(Flags);
+      Flags.setNoFPExcept(true);
+      Slow->setFlags(Flags);
+    } else {
+      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or);
+      Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt);
+      Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
+    }
+
     Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast);
     return true;
   }
@@ -6105,8 +6290,18 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
     SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
     SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
     SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
-    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
-    Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+    if (Node->isStrictFPOpcode()) {
+      SDValue HiSub =
+          DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other},
+                      {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52});
+      Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other},
+                           {HiSub.getValue(1), LoFlt, HiSub});
+      Chain = Result.getValue(1);
+    } else {
+      SDValue HiSub =
+          DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
+      Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
+    }
     return true;
   }
 
@@ -6150,6 +6345,26 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
     }
   }
 
+  // If none of the above worked, but there are no NaNs, then expand to
+  // a compare/select sequence.  This is required for correctness since
+  // InstCombine might have canonicalized a fcmp+select sequence to a
+  // FMINNUM/FMAXNUM node.  If we were to fall through to the default
+  // expansion to libcall, we might introduce a link-time dependency
+  // on libm into a file that originally did not have one.
+  if (Node->getFlags().hasNoNaNs()) {
+    ISD::CondCode Pred =
+        Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
+    SDValue Op1 = Node->getOperand(0);
+    SDValue Op2 = Node->getOperand(1);
+    SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred);
+    // Copy FMF flags, but always set the no-signed-zeros flag
+    // as this is implied by the FMINNUM/FMAXNUM semantics.
+    SDNodeFlags Flags = Node->getFlags();
+    Flags.setNoSignedZeros(true);
+    SelCC->setFlags(Flags);
+    return SelCC;
+  }
+
   return SDValue();
 }
 
@@ -6342,8 +6557,9 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
   return true;
 }
 
-SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
-                                            SelectionDAG &DAG) const {
+std::pair<SDValue, SDValue>
+TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
+                                    SelectionDAG &DAG) const {
   SDLoc SL(LD);
   SDValue Chain = LD->getChain();
   SDValue BasePTR = LD->getBasePtr();
@@ -6377,7 +6593,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
   SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
 
-  return DAG.getMergeValues({Value, NewChain}, SL);
+  return std::make_pair(Value, NewChain);
 }
 
 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
@@ -6471,10 +6687,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
       if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
           LoadedVT.isVector()) {
         // Scalarize the load and let the individual components be handled.
-        SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
-        if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
-          return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
-        return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
+        return scalarizeVectorLoad(LD, DAG);
       }
 
       // Expand to a (misaligned) integer load of the same size,
@@ -6807,7 +7020,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
 
   Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
                       DAG.getConstant(EltSize, dl, IdxVT));
-  return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
+  return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
 }
 
 //===----------------------------------------------------------------------===//
@@ -7096,6 +7309,86 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
   return Result;
 }
 
+SDValue
+TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
+                                    SDValue LHS, SDValue RHS,
+                                    unsigned Scale, SelectionDAG &DAG) const {
+  assert((Opcode == ISD::SDIVFIX ||
+          Opcode == ISD::UDIVFIX) &&
+         "Expected a fixed point division opcode");
+
+  EVT VT = LHS.getValueType();
+  bool Signed = Opcode == ISD::SDIVFIX;
+  EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+  // If there is enough room in the type to upscale the LHS or downscale the
+  // RHS before the division, we can perform it in this type without having to
+  // resize. For signed operations, the LHS headroom is the number of
+  // redundant sign bits, and for unsigned ones it is the number of zeroes.
+  // The headroom for the RHS is the number of trailing zeroes.
+  unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
+                            : DAG.computeKnownBits(LHS).countMinLeadingZeros();
+  unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+
+  if (LHSLead + RHSTrail < Scale)
+    return SDValue();
+
+  unsigned LHSShift = std::min(LHSLead, Scale);
+  unsigned RHSShift = Scale - LHSShift;
+
+  // At this point, we know that if we shift the LHS up by LHSShift and the
+  // RHS down by RHSShift, we can emit a regular division with a final scaling
+  // factor of Scale.
+
+  EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
+  if (LHSShift)
+    LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
+                      DAG.getConstant(LHSShift, dl, ShiftTy));
+  if (RHSShift)
+    RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
+                      DAG.getConstant(RHSShift, dl, ShiftTy));
+
+  SDValue Quot;
+  if (Signed) {
+    // For signed operations, if the resulting quotient is negative and the
+    // remainder is nonzero, subtract 1 from the quotient to round towards
+    // negative infinity.
+    SDValue Rem;
+    // FIXME: Ideally we would always produce an SDIVREM here, but if the
+    // type isn't legal, SDIVREM cannot be expanded. There is no reason why
+    // we couldn't just form a libcall, but the type legalizer doesn't do it.
+    if (isTypeLegal(VT) &&
+        isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
+      Quot = DAG.getNode(ISD::SDIVREM, dl,
+                         DAG.getVTList(VT, VT),
+                         LHS, RHS);
+      Rem = Quot.getValue(1);
+      Quot = Quot.getValue(0);
+    } else {
+      Quot = DAG.getNode(ISD::SDIV, dl, VT,
+                         LHS, RHS);
+      Rem = DAG.getNode(ISD::SREM, dl, VT,
+                        LHS, RHS);
+    }
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+    SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
+    SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
+    SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
+    SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
+    SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
+                               DAG.getConstant(1, dl, VT));
+    Quot = DAG.getSelect(dl, VT,
+                         DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
+                         Sub1, Quot);
+  } else
+    Quot = DAG.getNode(ISD::UDIV, dl, VT,
+                       LHS, RHS);
+
+  // TODO: Saturation.
+
+  return Quot;
+}
+
 void TargetLowering::expandUADDSUBO(
     SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
   SDLoc dl(Node);
diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 17a4d76c4c80..45427dc41e6e 100644
--- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Transforms/Utils/EscapeEnumerator.h"
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 412a00095b9b..85dd4f59fa13 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -73,6 +73,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index db520d4e6403..4abf9ea41b65 100644
--- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -15,7 +15,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -24,9 +23,11 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "sjljehprepare"
@@ -175,9 +176,9 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   // that needs to be restored on all exits from the function. This is an alloca
   // because the value needs to be added to the global context list.
   auto &DL = F.getParent()->getDataLayout();
-  unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
-  FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(),
-                           nullptr, Align, "fn_context", &EntryBB->front());
+  const Align Alignment(DL.getPrefTypeAlignment(FunctionContextTy));
+  FuncCtx = new AllocaInst(FunctionContextTy, DL.getAllocaAddrSpace(), nullptr,
+                           Alignment, "fn_context", &EntryBB->front());
 
   // Fill in the function context structure.
   for (LandingPadInst *LPI : LPads) {
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index 9fff873324d0..6664b58eccf8 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -18,6 +19,16 @@ using namespace llvm;
 #define DEBUG_TYPE "slotindexes"
 
 char SlotIndexes::ID = 0;
+
+SlotIndexes::SlotIndexes() : MachineFunctionPass(ID), mf(nullptr) {
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+}
+
+SlotIndexes::~SlotIndexes() {
+  // The indexList's nodes are all allocated in the BumpPtrAllocator.
+  indexList.clearAndLeakNodesUnsafely();
+}
+
 INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
                 "Slot index numbering", false, false)
 
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 11452fdb747a..36a0ddf67b19 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -37,6 +37,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BlockFrequency.h"
 #include <algorithm>
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 641b54205d62..b6e81116286f 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -48,6 +48,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -1003,7 +1004,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
         // zone are okay, despite the fact that we don't have a good way
         // for validating all of the usages of the calculation.
 #ifndef NDEBUG
-        bool TouchesMemory = I.mayLoad() || I.mayStore();
+        bool TouchesMemory = I.mayLoadOrStore();
         // If we *don't* protect the user from escaped allocas, don't bother
         // validating the instructions.
         if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index fb2abf3daa7f..5ccfacfc26dc 100644
--- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index 383c91259ffc..e16587c44a55 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -260,7 +260,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
 
   // Create a LiveOutReg for each bit that is set in the register mask.
   for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg)
-    if ((Mask[Reg / 32] >> Reg % 32) & 1)
+    if ((Mask[Reg / 32] >> (Reg % 32)) & 1)
       LiveOuts.push_back(createLiveOutReg(Reg, TRI));
 
   // We don't need to keep track of a register if its super-register is already
@@ -294,14 +294,13 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
   return LiveOuts;
 }
 
-void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
+void StackMaps::recordStackMapOpers(const MCSymbol &MILabel,
+                                    const MachineInstr &MI, uint64_t ID,
                                     MachineInstr::const_mop_iterator MOI,
                                     MachineInstr::const_mop_iterator MOE,
                                     bool recordResult) {
   MCContext &OutContext = AP.OutStreamer->getContext();
-  MCSymbol *MILabel = OutContext.createTempSymbol();
-  AP.OutStreamer->EmitLabel(MILabel);
-
+  
   LocationVec Locations;
   LiveOutVec LiveOuts;
 
@@ -340,7 +339,7 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
   // Create an expression to calculate the offset of the callsite from function
   // entry.
   const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub(
-      MCSymbolRefExpr::create(MILabel, OutContext),
+      MCSymbolRefExpr::create(&MILabel, OutContext),
       MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext);
 
   CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
@@ -360,22 +359,23 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
     FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize)));
 }
 
-void StackMaps::recordStackMap(const MachineInstr &MI) {
+void StackMaps::recordStackMap(const MCSymbol &L, const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
 
   StackMapOpers opers(&MI);
   const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm();
-  recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()),
+  recordStackMapOpers(L, MI, ID, std::next(MI.operands_begin(),
+                                           opers.getVarIdx()),
                       MI.operands_end());
 }
 
-void StackMaps::recordPatchPoint(const MachineInstr &MI) {
+void StackMaps::recordPatchPoint(const MCSymbol &L, const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
 
   PatchPointOpers opers(&MI);
   const int64_t ID = opers.getID();
   auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
-  recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
+  recordStackMapOpers(L, MI, ID, MOI, MI.operands_end(),
                       opers.isAnyReg() && opers.hasDef());
 
 #ifndef NDEBUG
@@ -390,14 +390,14 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
 #endif
 }
 
-void StackMaps::recordStatepoint(const MachineInstr &MI) {
+void StackMaps::recordStatepoint(const MCSymbol &L, const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint");
 
   StatepointOpers opers(&MI);
   // Record all the deopt and gc operands (they're contiguous and run from the
   // initial index to the end of the operand list)
   const unsigned StartIdx = opers.getVarIdx();
-  recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx,
+  recordStackMapOpers(L, MI, opers.getID(), MI.operands_begin() + StartIdx,
                       MI.operands_end(), false);
 }
 
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 5683d1db473c..4e2189884bb1 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -61,6 +62,10 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
 
 char StackProtector::ID = 0;
 
+StackProtector::StackProtector() : FunctionPass(ID), SSPBufferSize(8) {
+  initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+}
+
 INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE,
                       "Insert stack protectors", false, true)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index 9c8143c55dc2..7ae758323280 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index 83acf7f80715..c2cd8fa0324e 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -42,7 +42,9 @@ SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
 
 void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
                                               const SwitchInst *SI,
-                                              MachineBasicBlock *DefaultMBB) {
+                                              MachineBasicBlock *DefaultMBB,
+                                              ProfileSummaryInfo *PSI,
+                                              BlockFrequencyInfo *BFI) {
 #ifndef NDEBUG
   // Clusters must be non-empty, sorted, and only contain Range clusters.
   assert(!Clusters.empty());
@@ -80,7 +82,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
   assert(Range >= NumCases);
 
   // Cheap case: the whole range may be suitable for jump table.
-  if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+  if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
     CaseCluster JTCluster;
     if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
       Clusters[0] = JTCluster;
@@ -138,7 +140,7 @@ void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
       assert(NumCases < UINT64_MAX / 100);
       assert(Range >= NumCases);
 
-      if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+      if (TLI->isSuitableForJumpTable(SI, NumCases, Range, PSI, BFI)) {
         unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
         unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
         int64_t NumEntries = j - i + 1;
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index ba348b4a9d41..648bf48b7d17 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -12,12 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 
 using namespace llvm;
@@ -37,6 +40,8 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineBranchProbabilityInfo>();
+    AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
@@ -55,6 +60,11 @@ public:
   EarlyTailDuplicate() : TailDuplicateBase(ID, true) {
     initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry());
   }
+
+  MachineFunctionProperties getClearedProperties() const override {
+    return MachineFunctionProperties()
+      .set(MachineFunctionProperties::Property::NoPHIs);
+  }
 };
 
 } // end anonymous namespace
@@ -74,7 +84,11 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
-  Duplicator.initMF(MF, PreRegAlloc, MBPI, /*LayoutMode=*/false);
+  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
+               &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
+               nullptr;
+  Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
 
   bool MadeChange = false;
   while (Duplicator.tailDuplicateBlocks())
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 03c68a37e459..cd1278fd4d8d 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -19,13 +19,16 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +80,8 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
 
 void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
                             const MachineBranchProbabilityInfo *MBPIin,
+                            const MachineBlockFrequencyInfo *MBFIin,
+                            ProfileSummaryInfo *PSIin,
                             bool LayoutModeIn, unsigned TailDupSizeIn) {
   MF = &MFin;
   TII = MF->getSubtarget().getInstrInfo();
@@ -84,6 +89,8 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
   MRI = &MF->getRegInfo();
   MMI = &MF->getMMI();
   MBPI = MBPIin;
+  MBFI = MBFIin;
+  PSI = PSIin;
   TailDupSize = TailDupSizeIn;
 
   assert(MBPI != nullptr && "Machine Branch Probability Info required");
@@ -555,14 +562,14 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  if (TailDupSize == 0 &&
-      TailDuplicateSize.getNumOccurrences() == 0 &&
-      MF->getFunction().hasOptSize())
-    MaxDuplicateCount = 1;
-  else if (TailDupSize == 0)
+  bool OptForSize = MF->getFunction().hasOptSize() ||
+                    llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
+  if (TailDupSize == 0)
     MaxDuplicateCount = TailDuplicateSize;
   else
     MaxDuplicateCount = TailDupSize;
+  if (OptForSize)
+    MaxDuplicateCount = 1;
 
   // If the block to be duplicated ends in an unanalyzable fallthrough, don't
   // duplicate it.
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9eeacc2584cb..bc59be890c97 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -60,6 +60,19 @@ bool TargetFrameLowering::needsFrameIndexResolution(
   return MF.getFrameInfo().hasStackObjects();
 }
 
+void TargetFrameLowering::getCalleeSaves(const MachineFunction &MF,
+                                         BitVector &CalleeSaves) const {
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  CalleeSaves.resize(TRI.getNumRegs());
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!MFI.isCalleeSavedInfoValid())
+    return;
+
+  for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
+    CalleeSaves.set(Info.getReg());
+}
+
 void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                BitVector &SavedRegs,
                                                RegScavenger *RS) const {
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 6cae3b869501..a98c627dab09 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/StackMaps.h"
@@ -1015,19 +1016,16 @@ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
 }
 
 // Default implementation of CreateTargetMIHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
-                               const ScheduleDAG *DAG) const {
-  return (ScheduleHazardRecognizer *)
-    new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
+ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer(
+    const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {
+  return new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
 }
 
 // Default implementation of CreateTargetPostRAHazardRecognizer.
 ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {
-  return (ScheduleHazardRecognizer *)
-    new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+  return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
 
 //===----------------------------------------------------------------------===//
@@ -1121,18 +1119,64 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
 }
 
 Optional<ParamLoadedValue>
-TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
+                                     Register Reg) const {
   const MachineFunction *MF = MI.getMF();
-  const MachineOperand *Op = nullptr;
-  DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});;
-  const MachineOperand *SrcRegOp, *DestRegOp;
-
-  if (isCopyInstr(MI, SrcRegOp, DestRegOp)) {
-    Op = SrcRegOp;
-    return ParamLoadedValue(*Op, Expr);
-  } else if (MI.isMoveImmediate()) {
-    Op = &MI.getOperand(1);
-    return ParamLoadedValue(*Op, Expr);
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+  DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});
+  int64_t Offset;
+
+  // To simplify the sub-register handling, verify that we only need to
+  // consider physical registers.
+  assert(MF->getProperties().hasProperty(
+      MachineFunctionProperties::Property::NoVRegs));
+
+  if (auto DestSrc = isCopyInstr(MI)) {
+    Register DestReg = DestSrc->Destination->getReg();
+
+    if (Reg == DestReg)
+      return ParamLoadedValue(*DestSrc->Source, Expr);
+
+    // Cases where super- or sub-registers needs to be described should
+    // be handled by the target's hook implementation.
+    assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) &&
+           "TargetInstrInfo::describeLoadedValue can't describe super- or "
+           "sub-regs for copy instructions");
+    return None;
+  } else if (auto RegImm = isAddImmediate(MI, Reg)) {
+    Register SrcReg = RegImm->Reg;
+    Offset = RegImm->Imm;
+    Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset);
+    return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
+  } else if (MI.hasOneMemOperand()) {
+    // Only describe memory which provably does not escape the function. As
+    // described in llvm.org/PR43343, escaped memory may be clobbered by the
+    // callee (or by another thread).
+    const auto &TII = MF->getSubtarget().getInstrInfo();
+    const MachineFrameInfo &MFI = MF->getFrameInfo();
+    const MachineMemOperand *MMO = MI.memoperands()[0];
+    const PseudoSourceValue *PSV = MMO->getPseudoValue();
+
+    // If the address points to "special" memory (e.g. a spill slot), it's
+    // sufficient to check that it isn't aliased by any high-level IR value.
+    if (!PSV || PSV->mayAlias(&MFI))
+      return None;
+
+    const MachineOperand *BaseOp;
+    if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+      return None;
+
+    assert(MI.getNumExplicitDefs() == 1 &&
+           "Can currently only handle mem instructions with a single define");
+
+    // TODO: In what way do we need to take Reg into consideration here?
+
+    SmallVector<uint64_t, 8> Ops;
+    DIExpression::appendOffset(Ops, Offset);
+    Ops.push_back(dwarf::DW_OP_deref_size);
+    Ops.push_back(MMO->getSize());
+    Expr = DIExpression::prependOpcodes(Expr, Ops);
+    return ParamLoadedValue(*BaseOp, Expr);
   }
 
   return None;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 9b23012f47e3..e5a7b70d82c8 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -88,6 +88,14 @@ static cl::opt<unsigned> OptsizeJumpTableDensity(
     cl::desc("Minimum density for building a jump table in "
              "an optsize function"));
 
+// FIXME: This option is only to test if the strict fp operation processed
+// correctly by preventing mutating strict fp operation to normal fp operation
+// during development. When the backend supports strict float operation, this
+// option will be meaningless.
+static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation",
+       cl::desc("Don't mutate strict-float node to a legalize node"),
+       cl::init(false), cl::Hidden);
+
 static bool darwinHasSinCos(const Triple &TT) {
   assert(TT.isOSDarwin() && "should be called with darwin triple");
   // Don't bother with 32 bit x86.
@@ -148,7 +156,6 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
     setLibcallName(RTLIB::OLE_F128, "__lekf2");
     setLibcallName(RTLIB::OGT_F128, "__gtkf2");
     setLibcallName(RTLIB::UO_F128, "__unordkf2");
-    setLibcallName(RTLIB::O_F128, "__unordkf2");
   }
 
   // A few names are different on particular architectures or environments.
@@ -556,10 +563,6 @@ static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
   CCs[RTLIB::UO_F64] = ISD::SETNE;
   CCs[RTLIB::UO_F128] = ISD::SETNE;
   CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
-  CCs[RTLIB::O_F32] = ISD::SETEQ;
-  CCs[RTLIB::O_F64] = ISD::SETEQ;
-  CCs[RTLIB::O_F128] = ISD::SETEQ;
-  CCs[RTLIB::O_PPCF128] = ISD::SETEQ;
 }
 
 /// NOTE: The TargetMachine owns TLOF.
@@ -572,8 +575,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   MaxGluedStoresPerMemcpy = 0;
   MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
       MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
-  UseUnderscoreSetJmp = false;
-  UseUnderscoreLongJmp = false;
   HasMultipleConditionRegisters = false;
   HasExtractBitsInsn = false;
   JumpIsExpensive = JumpIsExpensiveOverride;
@@ -585,6 +586,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   BooleanVectorContents = UndefinedBooleanContent;
   SchedPreferenceInfo = Sched::ILP;
   GatherAllAliasesMaxDepth = 18;
+  IsStrictFPEnabled = DisableStrictNodeMutation;
   // TODO: the default will be switched to 0 in the next commit, along
   // with the Target-specific changes necessary.
   MaxAtomicSizeInBitsSupported = 1024;
@@ -624,6 +626,8 @@ void TargetLoweringBase::initActions() {
          IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
       setIndexedLoadAction(IM, VT, Expand);
       setIndexedStoreAction(IM, VT, Expand);
+      setIndexedMaskedLoadAction(IM, VT, Expand);
+      setIndexedMaskedStoreAction(IM, VT, Expand);
     }
 
     // Most backends expect to see the node which just returns the value loaded.
@@ -654,6 +658,8 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::SMULFIXSAT, VT, Expand);
     setOperationAction(ISD::UMULFIX, VT, Expand);
     setOperationAction(ISD::UMULFIXSAT, VT, Expand);
+    setOperationAction(ISD::SDIVFIX, VT, Expand);
+    setOperationAction(ISD::UDIVFIX, VT, Expand);
 
     // Overflow operations default to expand
     setOperationAction(ISD::SADDO, VT, Expand);
@@ -687,6 +693,7 @@ void TargetLoweringBase::initActions() {
     // These operations default to expand for vector types.
     if (VT.isVector()) {
       setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
       setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
@@ -694,38 +701,9 @@ void TargetLoweringBase::initActions() {
     }
 
     // Constrained floating-point operations default to expand.
-    setOperationAction(ISD::STRICT_FADD, VT, Expand);
-    setOperationAction(ISD::STRICT_FSUB, VT, Expand);
-    setOperationAction(ISD::STRICT_FMUL, VT, Expand);
-    setOperationAction(ISD::STRICT_FDIV, VT, Expand);
-    setOperationAction(ISD::STRICT_FREM, VT, Expand);
-    setOperationAction(ISD::STRICT_FMA, VT, Expand);
-    setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
-    setOperationAction(ISD::STRICT_FPOW, VT, Expand);
-    setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
-    setOperationAction(ISD::STRICT_FSIN, VT, Expand);
-    setOperationAction(ISD::STRICT_FCOS, VT, Expand);
-    setOperationAction(ISD::STRICT_FEXP, VT, Expand);
-    setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
-    setOperationAction(ISD::STRICT_FLOG, VT, Expand);
-    setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
-    setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
-    setOperationAction(ISD::STRICT_LRINT, VT, Expand);
-    setOperationAction(ISD::STRICT_LLRINT, VT, Expand);
-    setOperationAction(ISD::STRICT_FRINT, VT, Expand);
-    setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
-    setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
-    setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
-    setOperationAction(ISD::STRICT_LROUND, VT, Expand);
-    setOperationAction(ISD::STRICT_LLROUND, VT, Expand);
-    setOperationAction(ISD::STRICT_FROUND, VT, Expand);
-    setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
-    setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
-    setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
-    setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
-    setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
-    setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
-    setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)                   \
+    setOperationAction(ISD::STRICT_##DAGN, VT, Expand);
+#include "llvm/IR/ConstrainedOps.def"
 
     // For most targets @llvm.get.dynamic.area.offset just returns 0.
     setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
@@ -1332,8 +1310,11 @@ void TargetLoweringBase::computeRegisterProperties(
       MVT IntermediateVT;
       MVT RegisterVT;
       unsigned NumIntermediates;
-      NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+      unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT,
           NumIntermediates, RegisterVT, this);
+      NumRegistersForVT[i] = NumRegisters;
+      assert(NumRegistersForVT[i] == NumRegisters &&
+             "NumRegistersForVT size cannot represent NumRegisters!");
       RegisterTypeForVT[i] = RegisterVT;
 
       MVT NVT = VT.getPow2VectorType();
@@ -1456,6 +1437,28 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
   return NumVectorRegs;
 }
 
+bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
+                                                uint64_t NumCases,
+                                                uint64_t Range,
+                                                ProfileSummaryInfo *PSI,
+                                                BlockFrequencyInfo *BFI) const {
+  // FIXME: This function check the maximum table size and density, but the
+  // minimum size is not checked. It would be nice if the minimum size is
+  // also combined within this function. Currently, the minimum size check is
+  // performed in findJumpTable() in SelectionDAGBuiler and
+  // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
+  const bool OptForSize =
+      SI->getParent()->getParent()->hasOptSize() ||
+      llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI);
+  const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
+  const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
+
+  // Check whether the number of cases is small enough and
+  // the range is dense enough for a jump table.
+  return (OptForSize || Range <= MaxJumpTableSize) &&
+         (NumCases * 100 >= Range * MinDensity);
+}
+
 /// Get the EVTs and ArgFlags collections that represent the legalized return
 /// type of the given function.  This does not require a DAG or a return value,
 /// and is suitable for use before any DAGs for the function are constructed.
@@ -1641,6 +1644,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
   case ExtractValue:   return ISD::MERGE_VALUES;
   case InsertValue:    return ISD::MERGE_VALUES;
   case LandingPad:     return 0;
+  case Freeze:         return 0;
   }
 
   llvm_unreachable("Unknown instruction type encountered!");
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4978f4b9500b..8cb9814300d1 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -273,7 +273,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
 
     Streamer.SwitchSection(S);
 
-    for (const auto &Operand : LinkerOptions->operands()) {
+    for (const auto *Operand : LinkerOptions->operands()) {
       if (cast<MDNode>(Operand)->getNumOperands() != 2)
         report_fatal_error("invalid llvm.linker.options");
       for (const auto &Option : cast<MDNode>(Operand)->operands()) {
@@ -289,7 +289,7 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
 
     Streamer.SwitchSection(S);
 
-    for (const auto &Operand : DependentLibraries->operands()) {
+    for (const auto *Operand : DependentLibraries->operands()) {
       Streamer.EmitBytes(
           cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
       Streamer.EmitIntValue(0, 1);
@@ -885,7 +885,7 @@ void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
                                                        Module &M) const {
   // Emit the linker options if present.
   if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) {
-    for (const auto &Option : LinkerOptions->operands()) {
+    for (const auto *Option : LinkerOptions->operands()) {
       SmallVector<std::string, 4> StrOptions;
       for (const auto &Piece : cast<MDNode>(Option)->operands())
         StrOptions.push_back(cast<MDString>(Piece)->getString());
@@ -1449,7 +1449,7 @@ void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
     // linker.
     MCSection *Sec = getDrectveSection();
     Streamer.SwitchSection(Sec);
-    for (const auto &Option : LinkerOptions->operands()) {
+    for (const auto *Option : LinkerOptions->operands()) {
       for (const auto &Piece : cast<MDNode>(Option)->operands()) {
         // Lead with a space for consistency with our dllexport implementation.
         std::string Directive(" ");
@@ -1849,18 +1849,66 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
         SC, Kind, /* BeginSymbolName */ nullptr);
   }
 
+  if (Kind.isMergeableCString()) {
+    if (!Kind.isMergeable1ByteCString())
+      report_fatal_error("Unhandled multi-byte mergeable string kind.");
+
+    unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+        cast<GlobalVariable>(GO));
+
+    unsigned EntrySize = getEntrySizeForKind(Kind);
+    std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+    SmallString<128> Name;
+    Name = SizeSpec + utostr(Align);
+
+    return getContext().getXCOFFSection(
+        Name, XCOFF::XMC_RO, XCOFF::XTY_SD,
+        TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO),
+        Kind, /* BeginSymbolName */ nullptr);
+  }
+
   if (Kind.isText())
     return TextSection;
 
-  if (Kind.isData())
+  if (Kind.isData() || Kind.isReadOnlyWithRel())
+    // TODO: We may put this under option control, because user may want to
+    // have read-only data with relocations placed into a read-only section by
+    // the compiler.
+    return DataSection;
+
+  // Zero initialized data must be emitted to the .data section because external
+  // linkage control sections that get mapped to the .bss section will be linked
+  // as tentative defintions, which is only appropriate for SectionKind::Common.
+  if (Kind.isBSS())
     return DataSection;
 
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
   report_fatal_error("XCOFF other section types not yet implemented.");
 }
 
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForJumpTable(
+    const Function &F, const TargetMachine &TM) const {
+  assert (!TM.getFunctionSections() && "Unique sections not supported on XCOFF"
+          " yet.");
+  assert (!F.getComdat() && "Comdat not supported on XCOFF.");
+  //TODO: Enable emiting jump table to unique sections when we support it.
+  return ReadOnlySection;
+}
+
 bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
     bool UsesLabelDifference, const Function &F) const {
-  report_fatal_error("TLOF XCOFF not yet implemented.");
+  return false;
+}
+
+/// Given a mergeable constant with the specified size and relocation
+/// information, return a section that it should be placed in.
+MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C,
+    unsigned &Align) const {
+  //TODO: Enable emiting constant pool to unique sections when we support it.
+  return ReadOnlySection;
 }
 
 void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
@@ -1891,6 +1939,7 @@ XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
     const GlobalObject *GO) {
   switch (GO->getLinkage()) {
   case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
     return XCOFF::C_HIDEXT;
   case GlobalValue::ExternalLinkage:
   case GlobalValue::CommonLinkage:
diff --git a/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 039748d817ca..d794a261ecb2 100644
--- a/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -28,20 +28,8 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
 
   const Function &F = MF.getFunction();
 
-  // TODO: Remove support for old `fp elim` function attributes after fully
-  //       migrate to use "frame-pointer"
-  if (!F.hasFnAttribute("frame-pointer")) {
-    // Check to see if we should eliminate all frame pointers.
-    if (F.getFnAttribute("no-frame-pointer-elim").getValueAsString() == "true")
-      return true;
-
-    // Check to see if we should eliminate non-leaf frame pointers.
-    if (F.hasFnAttribute("no-frame-pointer-elim-non-leaf"))
-      return MF.getFrameInfo().hasCalls();
-
+  if (!F.hasFnAttribute("frame-pointer"))
     return false;
-  }
-
   StringRef FP = F.getFnAttribute("frame-pointer").getValueAsString();
   if (FP == "all")
     return true;
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index f1f4f65adf7c..41cb511ad9b4 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -30,6 +30,7 @@
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Pass.h"
@@ -38,8 +39,8 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Threading.h"
 #include "llvm/Support/SaveAndRestore.h"
+#include "llvm/Support/Threading.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils.h"
@@ -178,10 +179,10 @@ static cl::opt<CFLAAType> UseCFLAA(
 /// Option names for limiting the codegen pipeline.
 /// Those are used in error reporting and we didn't want
 /// to duplicate their names all over the place.
-static const char *StartAfterOptName = "start-after";
-static const char *StartBeforeOptName = "start-before";
-static const char *StopAfterOptName = "stop-after";
-static const char *StopBeforeOptName = "stop-before";
+static const char StartAfterOptName[] = "start-after";
+static const char StartBeforeOptName[] = "start-before";
+static const char StopAfterOptName[] = "stop-after";
+static const char StopBeforeOptName[] = "stop-before";
 
 static cl::opt<std::string>
     StartAfterOpt(StringRef(StartAfterOptName),
diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index 59eb2f9c88cb..63766df4d2be 100644
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -54,6 +54,10 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
   return getSchedModel().PostRAScheduler;
 }
 
+bool TargetSubtargetInfo::enablePostRAMachineScheduler() const {
+  return enableMachineScheduler() && enablePostRAScheduler();
+}
+
 bool TargetSubtargetInfo::useAA() const {
   return false;
 }
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index ea971809d4e4..2b1ffab74b6f 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1287,7 +1287,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
   bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
 
   // If the instruction is convertible to 3 Addr, instead
-  // of returning try 3 Addr transformation aggresively and
+  // of returning try 3 Addr transformation aggressively and
   // use this variable to check later. Because it might be better.
   // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
   // instead of the following code.
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
new file mode 100644
index 000000000000..4522484222f5
--- /dev/null
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -0,0 +1,1011 @@
+//===----- TypePromotion.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is an opcode based type promotion pass for small types that would
+/// otherwise be promoted during legalisation. This works around the limitations
+/// of selection dag for cyclic regions. The search begins from icmp
+/// instructions operands where a tree, consisting of non-wrapping or safe
+/// wrapping instructions, is built, checked and promoted if possible.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DEBUG_TYPE "type-promotion"
+#define PASS_NAME "Type Promotion"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisablePromotion("disable-type-promotion", cl::Hidden, cl::init(false),
+                 cl::desc("Disable type promotion pass"));
+
+// The goal of this pass is to enable more efficient code generation for
+// operations on narrow types (i.e. types with < 32-bits) and this is a
+// motivating IR code example:
+//
+//   define hidden i32 @cmp(i8 zeroext) {
+//     %2 = add i8 %0, -49
+//     %3 = icmp ult i8 %2, 3
+//     ..
+//   }
+//
+// The issue here is that i8 is type-legalized to i32 because i8 is not a
+// legal type. Thus, arithmetic is done in integer-precision, but then the
+// byte value is masked out as follows:
+//
+//   t19: i32 = add t4, Constant:i32<-49>
+//     t24: i32 = and t19, Constant:i32<255>
+//
+// Consequently, we generate code like this:
+//
+//   subs  r0, #49
+//   uxtb  r1, r0
+//   cmp r1, #3
+//
+// This shows that masking out the byte value results in generation of
+// the UXTB instruction. This is not optimal as r0 already contains the byte
+// value we need, and so instead we can just generate:
+//
+//   sub.w r1, r0, #49
+//   cmp r1, #3
+//
+// We achieve this by type promoting the IR to i32 like so for this example:
+//
+//   define i32 @cmp(i8 zeroext %c) {
+//     %0 = zext i8 %c to i32
+//     %c.off = add i32 %0, -49
+//     %1 = icmp ult i32 %c.off, 3
+//     ..
+//   }
+//
+// For this to be valid and legal, we need to prove that the i32 add is
+// producing the same value as the i8 addition, and that e.g. no overflow
+// happens.
+//
+// A brief sketch of the algorithm and some terminology.
+// We pattern match interesting IR patterns:
+// - which have "sources": instructions producing narrow values (i8, i16), and
+// - they have "sinks": instructions consuming these narrow values.
+//
+// We collect all instruction connecting sources and sinks in a worklist, so
+// that we can mutate these instruction and perform type promotion when it is
+// legal to do so.
+
+namespace {
+class IRPromoter {
+  LLVMContext &Ctx;
+  IntegerType *OrigTy = nullptr;
+  unsigned PromotedWidth = 0;
+  SetVector<Value*> &Visited;
+  SetVector<Value*> &Sources;
+  SetVector<Instruction*> &Sinks;
+  SmallVectorImpl<Instruction*> &SafeWrap;
+  IntegerType *ExtTy = nullptr;
+  SmallPtrSet<Value*, 8> NewInsts;
+  SmallPtrSet<Instruction*, 4> InstsToRemove;
+  DenseMap<Value*, SmallVector<Type*, 4>> TruncTysMap;
+  SmallPtrSet<Value*, 8> Promoted;
+
+  void ReplaceAllUsersOfWith(Value *From, Value *To);
+  void PrepareWrappingAdds(void);
+  void ExtendSources(void);
+  void ConvertTruncs(void);
+  void PromoteTree(void);
+  void TruncateSinks(void);
+  void Cleanup(void);
+
+public:
+  IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
+             SetVector<Value*> &visited, SetVector<Value*> &sources,
+             SetVector<Instruction*> &sinks,
+             SmallVectorImpl<Instruction*> &wrap) :
+    Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+    Sources(sources), Sinks(sinks), SafeWrap(wrap) {
+    ExtTy = IntegerType::get(Ctx, PromotedWidth);
+    assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits()
+           && "Original type not smaller than extended type");
+  }
+
+  void Mutate();
+};
+
+class TypePromotion : public FunctionPass {
+  unsigned TypeSize = 0;
+  LLVMContext *Ctx = nullptr;
+  unsigned RegisterBitWidth = 0;
+  SmallPtrSet<Value*, 16> AllVisited;
+  SmallPtrSet<Instruction*, 8> SafeToPromote;
+  SmallVector<Instruction*, 4> SafeWrap;
+
+  // Does V have the same size result type as TypeSize.
+  bool EqualTypeSize(Value *V);
+  // Does V have the same size, or narrower, result type as TypeSize.
+  bool LessOrEqualTypeSize(Value *V);
+  // Does V have a result type that is wider than TypeSize.
+  bool GreaterThanTypeSize(Value *V);
+  // Does V have a result type that is narrower than TypeSize.
+  bool LessThanTypeSize(Value *V);
+  // Should V be a leaf in the promote tree?
+  bool isSource(Value *V);
+  // Should V be a root in the promotion tree?
+  bool isSink(Value *V);
+  // Should we change the result type of V? It will result in the users of V
+  // being visited.
+  bool shouldPromote(Value *V);
+  // Is I an add or a sub, which isn't marked as nuw, but where a wrapping
+  // result won't affect the computation?
+  bool isSafeWrap(Instruction *I);
+  // Can V have its integer type promoted, or can the type be ignored.
+  bool isSupportedType(Value *V);
+  // Is V an instruction with a supported opcode or another value that we can
+  // handle, such as constants and basic blocks.
+  bool isSupportedValue(Value *V);
+  // Is V an instruction thats result can trivially promoted, or has safe
+  // wrapping.
+  bool isLegalToPromote(Value *V);
+  bool TryToPromote(Value *V, unsigned PromotedWidth);
+
+public:
+  static char ID;
+
+  TypePromotion() : FunctionPass(ID) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<TargetPassConfig>();
+  }
+
+  StringRef getPassName() const override { return PASS_NAME; }
+
+  bool runOnFunction(Function &F) override;
+};
+
+}
+
+static bool GenerateSignBits(Value *V) {
+  if (!isa<Instruction>(V))
+    return false;
+
+  unsigned Opc = cast<Instruction>(V)->getOpcode();
+  return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
+         Opc == Instruction::SRem || Opc == Instruction::SExt;
+}
+
+bool TypePromotion::EqualTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() == TypeSize;
+}
+
+bool TypePromotion::LessOrEqualTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() <= TypeSize;
+}
+
+bool TypePromotion::GreaterThanTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() > TypeSize;
+}
+
+bool TypePromotion::LessThanTypeSize(Value *V) {
+  return V->getType()->getScalarSizeInBits() < TypeSize;
+}
+
+/// Return true if the given value is a source in the use-def chain, producing
+/// a narrow 'TypeSize' value. These values will be zext to start the promotion
+/// of the tree to i32. We guarantee that these won't populate the upper bits
+/// of the register. ZExt on the loads will be free, and the same for call
+/// return values because we only accept ones that guarantee a zeroext ret val.
+/// Many arguments will have the zeroext attribute too, so those would be free
+/// too.
+bool TypePromotion::isSource(Value *V) {
+  if (!isa<IntegerType>(V->getType()))
+    return false;
+
+  // TODO Allow zext to be sources.
+  if (isa<Argument>(V))
+    return true;
+  else if (isa<LoadInst>(V))
+    return true;
+  else if (isa<BitCastInst>(V))
+    return true;
+  else if (auto *Call = dyn_cast<CallInst>(V))
+    return Call->hasRetAttr(Attribute::AttrKind::ZExt);
+  else if (auto *Trunc = dyn_cast<TruncInst>(V))
+    return EqualTypeSize(Trunc);
+  return false;
+}
+
+/// Return true if V will require any promoted values to be truncated for the
+/// the IR to remain valid. We can't mutate the value type of these
+/// instructions.
+bool TypePromotion::isSink(Value *V) {
+  // TODO The truncate also isn't actually necessary because we would already
+  // proved that the data value is kept within the range of the original data
+  // type.
+
+  // Sinks are:
+  // - points where the value in the register is being observed, such as an
+  //   icmp, switch or store.
+  // - points where value types have to match, such as calls and returns.
+  // - zext are included to ease the transformation and are generally removed
+  //   later on.
+  if (auto *Store = dyn_cast<StoreInst>(V))
+    return LessOrEqualTypeSize(Store->getValueOperand());
+  if (auto *Return = dyn_cast<ReturnInst>(V))
+    return LessOrEqualTypeSize(Return->getReturnValue());
+  if (auto *ZExt = dyn_cast<ZExtInst>(V))
+    return GreaterThanTypeSize(ZExt);
+  if (auto *Switch = dyn_cast<SwitchInst>(V))
+    return LessThanTypeSize(Switch->getCondition());
+  if (auto *ICmp = dyn_cast<ICmpInst>(V))
+    return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0));
+
+  return isa<CallInst>(V);
+}
+
+/// Return whether this instruction can safely wrap.
+bool TypePromotion::isSafeWrap(Instruction *I) {
+  // We can support a, potentially, wrapping instruction (I) if:
+  // - It is only used by an unsigned icmp.
+  // - The icmp uses a constant.
+  // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
+  //   around zero to become a larger number than before.
+  // - The wrapping instruction (I) also uses a constant.
+  //
+  // We can then use the two constants to calculate whether the result would
+  // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
+  // just underflows the range, the icmp would give the same result whether the
+  // result has been truncated or not. We calculate this by:
+  // - Zero extending both constants, if needed, to 32-bits.
+  // - Take the absolute value of I's constant, adding this to the icmp const.
+  // - Check that this value is not out of range for small type. If it is, it
+  //   means that it has underflowed enough to wrap around the icmp constant.
+  //
+  // For example:
+  //
+  // %sub = sub i8 %a, 2
+  // %cmp = icmp ule i8 %sub, 254
+  //
+  // If %a = 0, %sub = -2 == FE == 254
+  // But if this is evalulated as a i32
+  // %sub = -2 == FF FF FF FE == 4294967294
+  // So the unsigned compares (i8 and i32) would not yield the same result.
+  //
+  // Another way to look at it is:
+  // %a - 2 <= 254
+  // %a + 2 <= 254 + 2
+  // %a <= 256
+  // And we can't represent 256 in the i8 format, so we don't support it.
+  //
+  // Whereas:
+  //
+  // %sub i8 %a, 1
+  // %cmp = icmp ule i8 %sub, 254
+  //
+  // If %a = 0, %sub = -1 == FF == 255
+  // As i32:
+  // %sub = -1 == FF FF FF FF == 4294967295
+  //
+  // In this case, the unsigned compare results would be the same and this
+  // would also be true for ult, uge and ugt:
+  // - (255 < 254) == (0xFFFFFFFF < 254) == false
+  // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
+  // - (255 > 254) == (0xFFFFFFFF > 254) == true
+  // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
+  //
+  // To demonstrate why we can't handle increasing values:
+  //
+  // %add = add i8 %a, 2
+  // %cmp = icmp ult i8 %add, 127
+  //
+  // If %a = 254, %add = 256 == (i8 1)
+  // As i32:
+  // %add = 256
+  //
+  // (1 < 127) != (256 < 127)
+
+  unsigned Opc = I->getOpcode();
+  if (Opc != Instruction::Add && Opc != Instruction::Sub)
+    return false;
+
+  if (!I->hasOneUse() ||
+      !isa<ICmpInst>(*I->user_begin()) ||
+      !isa<ConstantInt>(I->getOperand(1)))
+    return false;
+
+  ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
+  bool NegImm = OverflowConst->isNegative();
+  bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
+                       ((Opc == Instruction::Add) && NegImm);
+  if (!IsDecreasing)
+    return false;
+
+  // Don't support an icmp that deals with sign bits.
+  auto *CI = cast<ICmpInst>(*I->user_begin());
+  if (CI->isSigned() || CI->isEquality())
+    return false;
+
+  ConstantInt *ICmpConst = nullptr;
+  if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
+    ICmpConst = Const;
+  else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
+    ICmpConst = Const;
+  else
+    return false;
+
+  // Now check that the result can't wrap on itself.
+  APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
+    ICmpConst->getValue().zext(32) : ICmpConst->getValue();
+
+  Total += OverflowConst->getValue().getBitWidth() < 32 ?
+    OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
+
+  APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize);
+
+  if (Total.getBitWidth() > Max.getBitWidth()) {
+    if (Total.ugt(Max.zext(Total.getBitWidth())))
+      return false;
+  } else if (Max.getBitWidth() > Total.getBitWidth()) {
+    if (Total.zext(Max.getBitWidth()).ugt(Max))
+      return false;
+  } else if (Total.ugt(Max))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
+             << *I << "\n");
+  SafeWrap.push_back(I);
+  return true;
+}
+
+bool TypePromotion::shouldPromote(Value *V) {
+  if (!isa<IntegerType>(V->getType()) || isSink(V))
+    return false;
+
+  if (isSource(V))
+    return true;
+
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
+
+  if (isa<ICmpInst>(I))
+    return false;
+
+  return true;
+}
+
+/// Return whether we can safely mutate V's type to ExtTy without having to be
+/// concerned with zero extending or truncation.
+static bool isPromotedResultSafe(Value *V) {
+  if (GenerateSignBits(V))
+    return false;
+
+  if (!isa<Instruction>(V))
+    return true;
+
+  if (!isa<OverflowingBinaryOperator>(V))
+    return true;
+
+  return cast<Instruction>(V)->hasNoUnsignedWrap();
+}
+
+void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
+  SmallVector<Instruction*, 4> Users;
+  Instruction *InstTo = dyn_cast<Instruction>(To);
+  bool ReplacedAll = true;
+
+  LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
+             << "\n");
+
+  for (Use &U : From->uses()) {
+    auto *User = cast<Instruction>(U.getUser());
+    if (InstTo && User->isIdenticalTo(InstTo)) {
+      ReplacedAll = false;
+      continue;
+    }
+    Users.push_back(User);
+  }
+
+  for (auto *U : Users)
+    U->replaceUsesOfWith(From, To);
+
+  if (ReplacedAll)
+    if (auto *I = dyn_cast<Instruction>(From))
+      InstsToRemove.insert(I);
+}
+
+void IRPromoter::PrepareWrappingAdds() {
+  LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n");
+  IRBuilder<> Builder{Ctx};
+
+  // For adds that safely wrap and use a negative immediate as operand 1, we
+  // create an equivalent instruction using a positive immediate.
+  // That positive immediate can then be zext along with all the other
+  // immediates later.
+  for (auto *I : SafeWrap) {
+    if (I->getOpcode() != Instruction::Add)
+      continue;
+
+    LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n");
+    assert((isa<ConstantInt>(I->getOperand(1)) &&
+            cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
+           "Wrapping should have a negative immediate as the second operand");
+
+    auto Const = cast<ConstantInt>(I->getOperand(1));
+    auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
+    Builder.SetInsertPoint(I);
+    Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
+    if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
+      NewInst->copyIRFlags(I);
+      NewInsts.insert(NewInst);
+    }
+    InstsToRemove.insert(I);
+    I->replaceAllUsesWith(NewVal);
+    LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n");
+  }
+  for (auto *I : NewInsts)
+    Visited.insert(I);
+}
+
+void IRPromoter::ExtendSources() {
+  IRBuilder<> Builder{Ctx};
+
+  auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
+    assert(V->getType() != ExtTy && "zext already extends to i32");
+    LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
+    Builder.SetInsertPoint(InsertPt);
+    if (auto *I = dyn_cast<Instruction>(V))
+      Builder.SetCurrentDebugLocation(I->getDebugLoc());
+
+    Value *ZExt = Builder.CreateZExt(V, ExtTy);
+    if (auto *I = dyn_cast<Instruction>(ZExt)) {
+      if (isa<Argument>(V))
+        I->moveBefore(InsertPt);
+      else
+        I->moveAfter(InsertPt);
+      NewInsts.insert(I);
+    }
+
+    ReplaceAllUsersOfWith(V, ZExt);
+  };
+
+  // Now, insert extending instructions between the sources and their users.
+  LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
+  for (auto V : Sources) {
+    LLVM_DEBUG(dbgs() << " - " << *V << "\n");
+    if (auto *I = dyn_cast<Instruction>(V))
+      InsertZExt(I, I);
+    else if (auto *Arg = dyn_cast<Argument>(V)) {
+      BasicBlock &BB = Arg->getParent()->front();
+      InsertZExt(Arg, &*BB.getFirstInsertionPt());
+    } else {
+      llvm_unreachable("unhandled source that needs extending");
+    }
+    Promoted.insert(V);
+  }
+}
+
+void IRPromoter::PromoteTree() {
+  LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
+
+  IRBuilder<> Builder{Ctx};
+
+  // Mutate the types of the instructions within the tree. Here we handle
+  // constant operands.
+  for (auto *V : Visited) {
+    if (Sources.count(V))
+      continue;
+
+    auto *I = cast<Instruction>(V);
+    if (Sinks.count(I))
+      continue;
+
+    for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
+      Value *Op = I->getOperand(i);
+      if ((Op->getType() == ExtTy) || !isa<IntegerType>(Op->getType()))
+        continue;
+
+      if (auto *Const = dyn_cast<ConstantInt>(Op)) {
+        Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+        I->setOperand(i, NewConst);
+      } else if (isa<UndefValue>(Op))
+        I->setOperand(i, UndefValue::get(ExtTy));
+    }
+
+    // Mutate the result type, unless this is an icmp.
+    if (!isa<ICmpInst>(I)) {
+      I->mutateType(ExtTy);
+      Promoted.insert(I);
+    }
+  }
+}
+
+void IRPromoter::TruncateSinks() {
+  LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");
+
+  IRBuilder<> Builder{Ctx};
+
+  auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+    if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType()))
+      return nullptr;
+
+    if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources.count(V))
+      return nullptr;
+
+    LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
+               << *V << "\n");
+    Builder.SetInsertPoint(cast<Instruction>(V));
+    auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
+    if (Trunc)
+      NewInsts.insert(Trunc);
+    return Trunc;
+  };
+
+  // Fix up any stores or returns that use the results of the promoted
+  // chain.
+  for (auto I : Sinks) {
+    LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");
+
+    // Handle calls separately as we need to iterate over arg operands.
+    if (auto *Call = dyn_cast<CallInst>(I)) {
+      for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+        Value *Arg = Call->getArgOperand(i);
+        Type *Ty = TruncTysMap[Call][i];
+        if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
+          Trunc->moveBefore(Call);
+          Call->setArgOperand(i, Trunc);
+        }
+      }
+      continue;
+    }
+
+    // Special case switches because we need to truncate the condition.
+    if (auto *Switch = dyn_cast<SwitchInst>(I)) {
+      Type *Ty = TruncTysMap[Switch][0];
+      if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
+        Trunc->moveBefore(Switch);
+        Switch->setCondition(Trunc);
+      }
+      continue;
+    }
+
+    // Now handle the others.
+    for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+      Type *Ty = TruncTysMap[I][i];
+      if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
+        Trunc->moveBefore(I);
+        I->setOperand(i, Trunc);
+      }
+    }
+  }
+}
+
+void IRPromoter::Cleanup() {
+  LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
+  // Some zexts will now have become redundant, along with their trunc
+  // operands, so remove them
+  for (auto V : Visited) {
+    if (!isa<ZExtInst>(V))
+      continue;
+
+    auto ZExt = cast<ZExtInst>(V);
+    if (ZExt->getDestTy() != ExtTy)
+      continue;
+
+    Value *Src = ZExt->getOperand(0);
+    if (ZExt->getSrcTy() == ZExt->getDestTy()) {
+      LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
+                 << "\n");
+      ReplaceAllUsersOfWith(ZExt, Src);
+      continue;
+    }
+
+    // Unless they produce a value that is narrower than ExtTy, we can
+    // replace the result of the zext with the input of a newly inserted
+    // trunc.
+    if (NewInsts.count(Src) && isa<TruncInst>(Src) &&
+        Src->getType() == OrigTy) {
+      auto *Trunc = cast<TruncInst>(Src);
+      assert(Trunc->getOperand(0)->getType() == ExtTy &&
+             "expected inserted trunc to be operating on i32");
+      ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
+    }
+  }
+
+  for (auto *I : InstsToRemove) {
+    LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
+    I->dropAllReferences();
+    I->eraseFromParent();
+  }
+}
+
+void IRPromoter::ConvertTruncs() {
+  LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
+  IRBuilder<> Builder{Ctx};
+
+  for (auto *V : Visited) {
+    if (!isa<TruncInst>(V) || Sources.count(V))
+      continue;
+
+    auto *Trunc = cast<TruncInst>(V);
+    Builder.SetInsertPoint(Trunc);
+    IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
+    IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);
+
+    unsigned NumBits = DestTy->getScalarSizeInBits();
+    ConstantInt *Mask =
+      ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
+    Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);
+
+    if (auto *I = dyn_cast<Instruction>(Masked))
+      NewInsts.insert(I);
+
+    ReplaceAllUsersOfWith(Trunc, Masked);
+  }
+}
+
+void IRPromoter::Mutate() {
+  LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains from "
+             << OrigTy->getBitWidth() << " to " << PromotedWidth << "-bits\n");
+
+  // Cache original types of the values that will likely need truncating
+  for (auto *I : Sinks) {
+    if (auto *Call = dyn_cast<CallInst>(I)) {
+      for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+        Value *Arg = Call->getArgOperand(i);
+        TruncTysMap[Call].push_back(Arg->getType());
+      }
+    } else if (auto *Switch = dyn_cast<SwitchInst>(I))
+      TruncTysMap[I].push_back(Switch->getCondition()->getType());
+    else {
+      for (unsigned i = 0; i < I->getNumOperands(); ++i)
+        TruncTysMap[I].push_back(I->getOperand(i)->getType());
+    }
+  }
+  for (auto *V : Visited) {
+    if (!isa<TruncInst>(V) || Sources.count(V))
+      continue;
+    auto *Trunc = cast<TruncInst>(V);
+    TruncTysMap[Trunc].push_back(Trunc->getDestTy());
+  }
+
+  // Convert adds using negative immediates to equivalent instructions that use
+  // positive constants.
+  PrepareWrappingAdds();
+
+  // Insert zext instructions between sources and their users.
+  ExtendSources();
+
+  // Promote visited instructions, mutating their types in place.
+  PromoteTree();
+
+  // Convert any truncs, that aren't sources, into AND masks.
+  ConvertTruncs();
+
+  // Insert trunc instructions for use by calls, stores etc...
+  TruncateSinks();
+
+  // Finally, remove unecessary zexts and truncs, delete old instructions and
+  // clear the data structures.
+  Cleanup();
+
+  LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
+}
+
+/// We disallow booleans to make life easier when dealing with icmps but allow
+/// any other integer that fits in a scalar register. Void types are accepted
+/// so we can handle switches.
+bool TypePromotion::isSupportedType(Value *V) {
+  Type *Ty = V->getType();
+
+  // Allow voids and pointers, these won't be promoted.
+  if (Ty->isVoidTy() || Ty->isPointerTy())
+    return true;
+
+  if (!isa<IntegerType>(Ty) ||
+      cast<IntegerType>(Ty)->getBitWidth() == 1 ||
+      cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
+    return false;
+
+  return LessOrEqualTypeSize(V);
+}
+
+/// We accept most instructions, as well as Arguments and ConstantInsts. We
+/// Disallow casts other than zext and truncs and only allow calls if their
+/// return value is zeroext. We don't allow opcodes that can introduce sign
+/// bits.
+bool TypePromotion::isSupportedValue(Value *V) {
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    switch (I->getOpcode()) {
+    default:
+      return isa<BinaryOperator>(I) && isSupportedType(I) &&
+             !GenerateSignBits(I);
+    case Instruction::GetElementPtr:
+    case Instruction::Store:
+    case Instruction::Br:
+    case Instruction::Switch:
+      return true;
+    case Instruction::PHI:
+    case Instruction::Select:
+    case Instruction::Ret:
+    case Instruction::Load:
+    case Instruction::Trunc:
+    case Instruction::BitCast:
+      return isSupportedType(I);
+    case Instruction::ZExt:
+      return isSupportedType(I->getOperand(0));
+    case Instruction::ICmp:
+      // Now that we allow small types than TypeSize, only allow icmp of
+      // TypeSize because they will require a trunc to be legalised.
+      // TODO: Allow icmp of smaller types, and calculate at the end
+      // whether the transform would be beneficial.
+      if (isa<PointerType>(I->getOperand(0)->getType()))
+        return true;
+      return EqualTypeSize(I->getOperand(0));
+    case Instruction::Call: {
+      // Special cases for calls as we need to check for zeroext
+      // TODO We should accept calls even if they don't have zeroext, as they
+      // can still be sinks.
+      auto *Call = cast<CallInst>(I);
+      return isSupportedType(Call) &&
+             Call->hasRetAttr(Attribute::AttrKind::ZExt);
+    }
+    }
+  } else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
+    return isSupportedType(V);
+  } else if (isa<Argument>(V))
+    return isSupportedType(V);
+
+  return isa<BasicBlock>(V);
+}
+
+/// Check that the type of V would be promoted and that the original type is
+/// smaller than the targeted promoted type. Check that we're not trying to
+/// promote something larger than our base 'TypeSize' type.
+bool TypePromotion::isLegalToPromote(Value *V) {
+
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return true;
+
+  if (SafeToPromote.count(I))
+   return true;
+
+  if (isPromotedResultSafe(V) || isSafeWrap(I)) {
+    SafeToPromote.insert(I);
+    return true;
+  }
+  return false;
+}
+
+bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
+  Type *OrigTy = V->getType();
+  TypeSize = OrigTy->getPrimitiveSizeInBits();
+  SafeToPromote.clear();
+  SafeWrap.clear();
+
+  if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
+             << TypeSize << " bits to " << PromotedWidth << "\n");
+
+  SetVector<Value*> WorkList;
+  SetVector<Value*> Sources;
+  SetVector<Instruction*> Sinks;
+  SetVector<Value*> CurrentVisited;
+  WorkList.insert(V);
+
+  // Return true if V was added to the worklist as a supported instruction,
+  // if it was already visited, or if we don't need to explore it (e.g.
+  // pointer values and GEPs), and false otherwise.
+  auto AddLegalInst = [&](Value *V) {
+    if (CurrentVisited.count(V))
+      return true;
+
+    // Ignore GEPs because they don't need promoting and the constant indices
+    // will prevent the transformation.
+    if (isa<GetElementPtrInst>(V))
+      return true;
+
+    if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
+      LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
+      return false;
+    }
+
+    WorkList.insert(V);
+    return true;
+  };
+
+  // Iterate through, and add to, a tree of operands and users in the use-def.
+  while (!WorkList.empty()) {
+    Value *V = WorkList.back();
+    WorkList.pop_back();
+    if (CurrentVisited.count(V))
+      continue;
+
+    // Ignore non-instructions, other than arguments.
+    if (!isa<Instruction>(V) && !isSource(V))
+      continue;
+
+    // If we've already visited this value from somewhere, bail now because
+    // the tree has already been explored.
+    // TODO: This could limit the transform, ie if we try to promote something
+    // from an i8 and fail first, before trying an i16.
+    if (AllVisited.count(V))
+      return false;
+
+    CurrentVisited.insert(V);
+    AllVisited.insert(V);
+
+    // Calls can be both sources and sinks.
+    if (isSink(V))
+      Sinks.insert(cast<Instruction>(V));
+
+    if (isSource(V))
+      Sources.insert(V);
+
+    if (!isSink(V) && !isSource(V)) {
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        // Visit operands of any instruction visited.
+        for (auto &U : I->operands()) {
+          if (!AddLegalInst(U))
+            return false;
+        }
+      }
+    }
+
+    // Don't visit users of a node which isn't going to be mutated unless its a
+    // source.
+    if (isSource(V) || shouldPromote(V)) {
+      for (Use &U : V->uses()) {
+        if (!AddLegalInst(U.getUser()))
+          return false;
+      }
+    }
+  }
+
+  LLVM_DEBUG(dbgs() << "IR Promotion: Visited nodes:\n";
+             for (auto *I : CurrentVisited)
+               I->dump();
+             );
+
+  unsigned ToPromote = 0;
+  unsigned NonFreeArgs = 0;
+  SmallPtrSet<BasicBlock*, 4> Blocks;
+  for (auto *V : CurrentVisited) {
+    if (auto *I = dyn_cast<Instruction>(V))
+      Blocks.insert(I->getParent());
+
+    if (Sources.count(V)) {
+      if (auto *Arg = dyn_cast<Argument>(V))
+        if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
+          ++NonFreeArgs;
+      continue;
+    }
+
+    if (Sinks.count(cast<Instruction>(V)))
+      continue;
+     ++ToPromote;
+   }
+
+  // DAG optimisations should be able to handle these cases better, especially
+  // for function arguments.
+  if (ToPromote < 2 || (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
+    return false;
+
+  if (ToPromote < 2)
+    return false;
+
+  IRPromoter Promoter(*Ctx, cast<IntegerType>(OrigTy), PromotedWidth,
+                      CurrentVisited, Sources, Sinks, SafeWrap);
+  Promoter.Mutate();
+  return true;
+}
+
+bool TypePromotion::runOnFunction(Function &F) {
+  if (skipFunction(F) || DisablePromotion)
+    return false;
+
+  LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");
+
+  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
+  if (!TPC)
+    return false;
+
+  bool MadeChange = false;
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  const TargetMachine &TM = TPC->getTM<TargetMachine>();
+  const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
+  const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
+  const TargetTransformInfo &TII =
+    getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  RegisterBitWidth = TII.getRegisterBitWidth(false);
+  Ctx = &F.getParent()->getContext();
+
+  // Search up from icmps to try to promote their operands.
+  for (BasicBlock &BB : F) {
+    for (auto &I : BB) {
+      if (AllVisited.count(&I))
+        continue;
+
+      if (!isa<ICmpInst>(&I))
+        continue;
+
+      auto *ICmp = cast<ICmpInst>(&I);
+      // Skip signed or pointer compares
+      if (ICmp->isSigned() ||
+          !isa<IntegerType>(ICmp->getOperand(0)->getType()))
+        continue;
+
+      LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");
+
+      for (auto &Op : ICmp->operands()) {
+        if (auto *I = dyn_cast<Instruction>(Op)) {
+          EVT SrcVT = TLI->getValueType(DL, I->getType());
+          if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
+            break;
+
+          if (TLI->getTypeAction(ICmp->getContext(), SrcVT) !=
+              TargetLowering::TypePromoteInteger)
+            break;
+
+          EVT PromotedVT = TLI->getTypeToTransformTo(ICmp->getContext(), SrcVT);
+          if (RegisterBitWidth < PromotedVT.getSizeInBits()) {
+            LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
+                       << "for promoted type\n");
+            break;
+          }
+
+          MadeChange |= TryToPromote(I, PromotedVT.getSizeInBits());
+          break;
+        }
+      }
+    }
+    LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
+                dbgs() << F;
+                report_fatal_error("Broken function after type promotion");
+               });
+  }
+  if (MadeChange)
+    LLVM_DEBUG(dbgs() << "After TypePromotion: " << F << "\n");
+
+  return MadeChange;
+}
+
+INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
+
+char TypePromotion::ID = 0;
+
+FunctionPass *llvm::createTypePromotionPass() {
+  return new TypePromotion();
+}
diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 3289eff71336..b770e1d94488 100644
--- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 73b862d51c0f..41cbdf035558 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -11,6 +11,7 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TypeSize.h"
 using namespace llvm;
 
 EVT EVT::changeExtendedTypeToInteger() const {
@@ -101,12 +102,12 @@ unsigned EVT::getExtendedVectorNumElements() const {
   return cast<VectorType>(LLVMTy)->getNumElements();
 }
 
-unsigned EVT::getExtendedSizeInBits() const {
+TypeSize EVT::getExtendedSizeInBits() const {
   assert(isExtended() && "Type is not extended!");
   if (IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
-    return ITy->getBitWidth();
+    return TypeSize::Fixed(ITy->getBitWidth());
   if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
-    return VTy->getBitWidth();
+    return VTy->getPrimitiveSizeInBits();
   llvm_unreachable("Unrecognized extended type!");
 }
 
@@ -119,139 +120,14 @@ std::string EVT::getEVTString() const {
              + getVectorElementType().getEVTString();
     if (isInteger())
       return "i" + utostr(getSizeInBits());
+    if (isFloatingPoint())
+      return "f" + utostr(getSizeInBits());
     llvm_unreachable("Invalid EVT!");
-  case MVT::i1:      return "i1";
-  case MVT::i8:      return "i8";
-  case MVT::i16:     return "i16";
-  case MVT::i32:     return "i32";
-  case MVT::i64:     return "i64";
-  case MVT::i128:    return "i128";
-  case MVT::f16:     return "f16";
-  case MVT::f32:     return "f32";
-  case MVT::f64:     return "f64";
-  case MVT::f80:     return "f80";
-  case MVT::f128:    return "f128";
   case MVT::ppcf128: return "ppcf128";
   case MVT::isVoid:  return "isVoid";
   case MVT::Other:   return "ch";
   case MVT::Glue:    return "glue";
   case MVT::x86mmx:  return "x86mmx";
-  case MVT::v1i1:    return "v1i1";
-  case MVT::v2i1:    return "v2i1";
-  case MVT::v4i1:    return "v4i1";
-  case MVT::v8i1:    return "v8i1";
-  case MVT::v16i1:   return "v16i1";
-  case MVT::v32i1:   return "v32i1";
-  case MVT::v64i1:   return "v64i1";
-  case MVT::v128i1:  return "v128i1";
-  case MVT::v256i1:  return "v256i1";
-  case MVT::v512i1:  return "v512i1";
-  case MVT::v1024i1: return "v1024i1";
-  case MVT::v1i8:    return "v1i8";
-  case MVT::v2i8:    return "v2i8";
-  case MVT::v4i8:    return "v4i8";
-  case MVT::v8i8:    return "v8i8";
-  case MVT::v16i8:   return "v16i8";
-  case MVT::v32i8:   return "v32i8";
-  case MVT::v64i8:   return "v64i8";
-  case MVT::v128i8:  return "v128i8";
-  case MVT::v256i8:  return "v256i8";
-  case MVT::v1i16:   return "v1i16";
-  case MVT::v2i16:   return "v2i16";
-  case MVT::v3i16:   return "v3i16";
-  case MVT::v4i16:   return "v4i16";
-  case MVT::v8i16:   return "v8i16";
-  case MVT::v16i16:  return "v16i16";
-  case MVT::v32i16:  return "v32i16";
-  case MVT::v64i16:  return "v64i16";
-  case MVT::v128i16: return "v128i16";
-  case MVT::v1i32:   return "v1i32";
-  case MVT::v2i32:   return "v2i32";
-  case MVT::v3i32:   return "v3i32";
-  case MVT::v4i32:   return "v4i32";
-  case MVT::v5i32:   return "v5i32";
-  case MVT::v8i32:   return "v8i32";
-  case MVT::v16i32:  return "v16i32";
-  case MVT::v32i32:  return "v32i32";
-  case MVT::v64i32:  return "v64i32";
-  case MVT::v128i32: return "v128i32";
-  case MVT::v256i32: return "v256i32";
-  case MVT::v512i32: return "v512i32";
-  case MVT::v1024i32:return "v1024i32";
-  case MVT::v2048i32:return "v2048i32";
-  case MVT::v1i64:   return "v1i64";
-  case MVT::v2i64:   return "v2i64";
-  case MVT::v4i64:   return "v4i64";
-  case MVT::v8i64:   return "v8i64";
-  case MVT::v16i64:  return "v16i64";
-  case MVT::v32i64:  return "v32i64";
-  case MVT::v1i128:  return "v1i128";
-  case MVT::v1f32:   return "v1f32";
-  case MVT::v2f32:   return "v2f32";
-  case MVT::v2f16:   return "v2f16";
-  case MVT::v3f16:   return "v3f16";
-  case MVT::v4f16:   return "v4f16";
-  case MVT::v8f16:   return "v8f16";
-  case MVT::v16f16:  return "v16f16";
-  case MVT::v32f16:  return "v32f16";
-  case MVT::v3f32:   return "v3f32";
-  case MVT::v4f32:   return "v4f32";
-  case MVT::v5f32:   return "v5f32";
-  case MVT::v8f32:   return "v8f32";
-  case MVT::v16f32:  return "v16f32";
-  case MVT::v32f32:  return "v32f32";
-  case MVT::v64f32:  return "v64f32";
-  case MVT::v128f32: return "v128f32";
-  case MVT::v256f32: return "v256f32";
-  case MVT::v512f32: return "v512f32";
-  case MVT::v1024f32:return "v1024f32";
-  case MVT::v2048f32:return "v2048f32";
-  case MVT::v1f64:   return "v1f64";
-  case MVT::v2f64:   return "v2f64";
-  case MVT::v4f64:   return "v4f64";
-  case MVT::v8f64:   return "v8f64";
-  case MVT::nxv1i1:  return "nxv1i1";
-  case MVT::nxv2i1:  return "nxv2i1";
-  case MVT::nxv4i1:  return "nxv4i1";
-  case MVT::nxv8i1:  return "nxv8i1";
-  case MVT::nxv16i1: return "nxv16i1";
-  case MVT::nxv32i1: return "nxv32i1";
-  case MVT::nxv1i8:  return "nxv1i8";
-  case MVT::nxv2i8:  return "nxv2i8";
-  case MVT::nxv4i8:  return "nxv4i8";
-  case MVT::nxv8i8:  return "nxv8i8";
-  case MVT::nxv16i8: return "nxv16i8";
-  case MVT::nxv32i8: return "nxv32i8";
-  case MVT::nxv1i16: return "nxv1i16";
-  case MVT::nxv2i16: return "nxv2i16";
-  case MVT::nxv4i16: return "nxv4i16";
-  case MVT::nxv8i16: return "nxv8i16";
-  case MVT::nxv16i16:return "nxv16i16";
-  case MVT::nxv32i16:return "nxv32i16";
-  case MVT::nxv1i32: return "nxv1i32";
-  case MVT::nxv2i32: return "nxv2i32";
-  case MVT::nxv4i32: return "nxv4i32";
-  case MVT::nxv8i32: return "nxv8i32";
-  case MVT::nxv16i32:return "nxv16i32";
-  case MVT::nxv32i32:return "nxv32i32";
-  case MVT::nxv1i64: return "nxv1i64";
-  case MVT::nxv2i64: return "nxv2i64";
-  case MVT::nxv4i64: return "nxv4i64";
-  case MVT::nxv8i64: return "nxv8i64";
-  case MVT::nxv16i64:return "nxv16i64";
-  case MVT::nxv32i64:return "nxv32i64";
-  case MVT::nxv2f16: return "nxv2f16";
-  case MVT::nxv4f16: return "nxv4f16";
-  case MVT::nxv8f16: return "nxv8f16";
-  case MVT::nxv1f32: return "nxv1f32";
-  case MVT::nxv2f32: return "nxv2f32";
-  case MVT::nxv4f32: return "nxv4f32";
-  case MVT::nxv8f32: return "nxv8f32";
-  case MVT::nxv16f32:return "nxv16f32";
-  case MVT::nxv1f64: return "nxv1f64";
-  case MVT::nxv2f64: return "nxv2f64";
-  case MVT::nxv4f64: return "nxv4f64";
-  case MVT::nxv8f64: return "nxv8f64";
   case MVT::Metadata:return "Metadata";
   case MVT::Untyped: return "Untyped";
   case MVT::exnref : return "exnref";
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 865a1cfbf43a..1582f12ad580 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -87,6 +87,8 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index cdf79374e974..87958a738c67 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -20,17 +20,19 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 
 using namespace llvm;
diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 119c3fd1ec7f..4847a0c3e842 100644
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetMachine.h"
author	Dimitry Andric <dim@FreeBSD.org>	2020-01-17 20:45:01 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2020-01-17 20:45:01 +0000
commit	706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch)
tree	4adf86a776049cbf7f69a1929c4babcbbef925eb /llvm/lib/CodeGen
parent	7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff)