Vendor import of llvm release_80 branch r353167:vendor/llvm/llvm-release_80-r353167

https://llvm.org/svn/llvm-project/llvm/branches/release_80@353167
author: Dimitry Andric <dim@FreeBSD.org> 2019-02-05 18:38:58 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2019-02-05 18:38:58 +0000
commit: e79719ce607b6130e41e23dbdc90cc6b4e0401e6 (patch)
tree: 47bb93fafd9582425ebb778ec77002c222f64ed4
parent: 3edec5c15a78e4abba7eb9102fef3891c84ebdfb (diff)
download: src-test2-e79719ce607b6130e41e23dbdc90cc6b4e0401e6.tar.gz
src-test2-e79719ce607b6130e41e23dbdc90cc6b4e0401e6.zip
61 files changed, 1533 insertions, 261 deletions
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 4dbc0ddaf4f0..0df6845aaa71 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -1280,7 +1280,6 @@ function(get_llvm_lit_path base_dir file_name)
   cmake_parse_arguments(ARG "ALLOW_EXTERNAL" "" "" ${ARGN})
 
   if (ARG_ALLOW_EXTERNAL)
-    set(LLVM_DEFAULT_EXTERNAL_LIT "${LLVM_EXTERNAL_LIT}")
     set (LLVM_EXTERNAL_LIT "" CACHE STRING "Command used to spawn lit")
     if ("${LLVM_EXTERNAL_LIT}" STREQUAL "")
       set(LLVM_EXTERNAL_LIT "${LLVM_DEFAULT_EXTERNAL_LIT}")
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index db9cf5194952..2bd435ca9d29 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -48,6 +48,12 @@ Non-comprehensive list of changes in this release
   functionality.  See `Writing an LLVM Pass
   <WritingAnLLVMPass.html#setting-up-the-build-environment>`_.
 
+* For MinGW, references to data variables that might need to be imported
+  from a dll are accessed via a stub, to allow the linker to convert it to
+  a dllimport if needed.
+
+* Added support for labels as offsets in ``.reloc`` directive.
+
 .. NOTE
    If you would like to document a larger change, then you can add a
    subsection about it right here. You can copy the following boilerplate
@@ -62,17 +68,44 @@ Changes to the LLVM IR
 ----------------------
 
 
+Changes to the AArch64 Target
+-----------------------------
+
+* Added support for the ``.arch_extension`` assembler directive, just like
+  on ARM.
+
+
 Changes to the ARM Backend
 --------------------------
 
  During this release ...
 
 
+Changes to the Hexagon Target
+--------------------------
+
+* Added support for Hexagon/HVX V66 ISA.
+
 Changes to the MIPS Target
 --------------------------
 
- During this release ...
+* Improved support of GlobalISel instruction selection framework.
+
+* Implemented emission of ``R_MIPS_JALR`` and ``R_MICROMIPS_JALR``
+  relocations. These relocations provide hints to a linker for optimization
+  of jumps to protected symbols.
+
+* ORC JIT has been supported for MIPS and MIPS64 architectures.
 
+* Assembler now suggests alternative MIPS instruction mnemonics when
+  an invalid one is specified.
+
+* Improved support for MIPS N32 ABI.
+
+* Added new instructions (``pll.ps``, ``plu.ps``, ``cvt.s.pu``,
+  ``cvt.s.pl``, ``cvt.ps``, ``sigrie``).
+
+* Numerous bug fixes and code cleanups.
 
 Changes to the PowerPC Target
 -----------------------------
@@ -123,7 +156,16 @@ Changes to the DAG infrastructure
 External Open Source Projects Using LLVM 8
 ==========================================
 
-* A project...
+Zig Programming Language
+------------------------
+
+`Zig <https://ziglang.org>`_  is a system programming language intended to be
+an alternative to C. It provides high level features such as generics, compile
+time function execution, and partial evaluation, while exposing low level LLVM
+IR features such as aliases and intrinsics. Zig uses Clang to provide automatic
+import of .h symbols, including inline functions and simple macros. Zig uses
+LLD combined with lazily building compiler-rt to provide out-of-the-box
+cross-compiling for all supported targets.
 
 
 Additional Information
diff --git a/include/llvm/Support/JSON.h b/include/llvm/Support/JSON.h
index 7a04fd52bc50..2b95a174e018 100644
--- a/include/llvm/Support/JSON.h
+++ b/include/llvm/Support/JSON.h
@@ -481,6 +481,7 @@ private:
   mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
                                       std::string, json::Array, json::Object>
       Union;
+  friend bool operator==(const Value &, const Value &);
 };
 
 bool operator==(const Value &, const Value &);
diff --git a/include/llvm/Transforms/Utils/FunctionImportUtils.h b/include/llvm/Transforms/Utils/FunctionImportUtils.h
index e24398b90012..fe1355976818 100644
--- a/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -44,6 +44,11 @@ class FunctionImportGlobalProcessing {
   /// to promote any non-renamable values.
   SmallPtrSet<GlobalValue *, 8> Used;
 
+  /// Keep track of any COMDATs that require renaming (because COMDAT
+  /// leader was promoted and renamed). Maps from original COMDAT to one
+  /// with new name.
+  DenseMap<const Comdat *, Comdat *> RenamedComdats;
+
   /// Check if we should promote the given local value to global scope.
   bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);
 
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8cabad4ad312..154f81f2622d 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1836,7 +1836,10 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
 
   unsigned Index = 0;
   SmallVector<TypeIndex, 8> ArgTypeIndices;
-  TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+  TypeIndex ReturnTypeIndex = TypeIndex::Void();
+  if (ReturnAndArgs.size() > Index) {
+    ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+  }
 
   // If the first argument is a pointer type and this isn't a static method,
   // treat it as the special 'this' parameter, which is encoded separately from
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1de2ffb6cfa1..18c5fe27b1a8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1956,8 +1956,10 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
 void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
   // Emit the size.
   Asm->OutStreamer->AddComment("Loc expr size");
-  Asm->emitInt16(DebugLocs.getBytes(Entry).size());
-
+  if (getDwarfVersion() >= 5)
+    Asm->EmitULEB128(DebugLocs.getBytes(Entry).size());
+  else
+    Asm->emitInt16(DebugLocs.getBytes(Entry).size());
   // Emit the entry.
   APByteStreamer Streamer(*Asm);
   emitDebugLocEntry(Streamer, Entry);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 764a84c7e132..dc1ad953e71d 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
   }
 
 #ifndef NDEBUG
-  bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
+  bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
+                   Op.getType() == MachineOperand::MO_MCSymbol;
   // OpNo now points as the desired insertion point.  Unless this is a variadic
   // instruction, only implicit regs are allowed beyond MCID->getNumOperands().
   // RegMask operands go between the explicit and implicit operands.
   assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
-          OpNo < MCID->getNumOperands() || isMetaDataOp) &&
+          OpNo < MCID->getNumOperands() || isDebugOp) &&
          "Trying to add an operand to a machine instr that is already done!");
 #endif
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff5505c97721..6af01423ca10 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16214,23 +16214,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   // The build vector contains some number of undef elements and exactly
   // one other element. That other element must be a zero-extended scalar
   // extracted from a vector at a constant index to turn this into a shuffle.
+  // Also, require that the build vector does not implicitly truncate/extend
+  // its elements.
   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+  EVT VT = BV->getValueType(0);
   SDValue Zext = BV->getOperand(ZextElt);
   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+      Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
     return SDValue();
 
-  // The zero-extend must be a multiple of the source size.
+  // The zero-extend must be a multiple of the source size, and we must be
+  // building a vector of the same size as the source of the extract element.
   SDValue Extract = Zext.getOperand(0);
   unsigned DestSize = Zext.getValueSizeInBits();
   unsigned SrcSize = Extract.getValueSizeInBits();
-  if (DestSize % SrcSize != 0)
+  if (DestSize % SrcSize != 0 ||
+      Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
     return SDValue();
 
   // Create a shuffle mask that will combine the extracted element with zeros
   // and undefs.
-  int ZextRatio =  DestSize / SrcSize;
+  int ZextRatio = DestSize / SrcSize;
   int NumMaskElts = NumBVOps * ZextRatio;
   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
   for (int i = 0; i != NumMaskElts; ++i) {
@@ -16260,7 +16266,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
                                       ShufMask);
-  return DAG.getBitcast(BV->getValueType(0), Shuf);
+  return DAG.getBitcast(VT, Shuf);
 }
 
 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
diff --git a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index f8b5ff6ec8fb..94df6946f3ae 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -184,7 +184,8 @@ DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, unsigned *Offset,
     }
 
     if (Kind != dwarf::DW_LLE_base_address) {
-      unsigned Bytes = Data.getU16(Offset);
+      unsigned Bytes =
+          Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
       // A single location description describing the location of the object...
       StringRef str = Data.getData().substr(*Offset, Bytes);
       *Offset += Bytes;
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index b2eb8b09982e..27064154221f 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -469,6 +469,11 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
     }
   }
 
+  if (Name == "seh.recoverfp") {
+    NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
+    return true;
+  }
+
   return false;
 }
 
@@ -544,10 +549,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
       return true;
     }
-    if (Name == "x86.seh.recoverfp") {
-      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
-      return true;
-    }
     break;
   }
 
diff --git a/lib/Support/JSON.cpp b/lib/Support/JSON.cpp
index d468013fb94a..07a556814915 100644
--- a/lib/Support/JSON.cpp
+++ b/lib/Support/JSON.cpp
@@ -182,6 +182,12 @@ bool operator==(const Value &L, const Value &R) {
   case Value::Boolean:
     return *L.getAsBoolean() == *R.getAsBoolean();
   case Value::Number:
+    // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323
+    // The same integer must convert to the same double, per the standard.
+    // However we see 64-vs-80-bit precision comparisons with gcc-7 -O3 -m32.
+    // So we avoid floating point promotion for exact comparisons.
+    if (L.Type == Value::T_Integer || R.Type == Value::T_Integer)
+      return L.getAsInteger() == R.getAsInteger();
     return *L.getAsNumber() == *R.getAsNumber();
   case Value::String:
     return *L.getAsString() == *R.getAsString();
diff --git a/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index e9699b0367d3..50300305abe3 100644
--- a/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -103,6 +103,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
@@ -146,25 +147,31 @@ private:
   BitVector RegsAlreadyMasked;
 
   bool functionUsesHardeningRegister(MachineFunction &MF) const;
-  bool instrumentControlFlow(MachineBasicBlock &MBB);
+  bool instrumentControlFlow(MachineBasicBlock &MBB,
+                             bool &UsesFullSpeculationBarrier);
   bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                                MachineBasicBlock *&FBB,
                                AArch64CC::CondCode &CondCode) const;
   void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
                           AArch64CC::CondCode &CondCode, DebugLoc DL) const;
-  void insertSPToRegTaintPropagation(MachineBasicBlock *MBB,
+  void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI) const;
-  void insertRegToSPTaintPropagation(MachineBasicBlock *MBB,
+  void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
                                      unsigned TmpReg) const;
+  void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    DebugLoc DL) const;
 
   bool slhLoads(MachineBasicBlock &MBB);
   bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MBBI,
                               MachineInstr &MI, unsigned Reg);
-  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB);
+  bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
+                                        bool UsesFullSpeculationBarrier);
   bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator MBBI);
+                                  MachineBasicBlock::iterator MBBI,
+                                  bool UsesFullSpeculationBarrier);
   bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                   DebugLoc DL);
 };
@@ -207,15 +214,19 @@ bool AArch64SpeculationHardening::endsWithCondControlFlow(
   return true;
 }
 
+void AArch64SpeculationHardening::insertFullSpeculationBarrier(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    DebugLoc DL) const {
+  // A full control flow speculation barrier consists of (DSB SYS + ISB)
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
+  BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
+}
+
 void AArch64SpeculationHardening::insertTrackingCode(
     MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
     DebugLoc DL) const {
   if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::ISB))
-        .addImm(0xf);
-    BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::DSB))
-        .addImm(0xf);
+    insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
   } else {
     BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
         .addDef(MisspeculatingTaintReg)
@@ -227,7 +238,7 @@ void AArch64SpeculationHardening::insertTrackingCode(
 }
 
 bool AArch64SpeculationHardening::instrumentControlFlow(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
   LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
 
   bool Modified = false;
@@ -263,55 +274,105 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
   }
 
   // Perform correct code generation around function calls and before returns.
-  {
-    SmallVector<MachineInstr *, 4> ReturnInstructions;
-    SmallVector<MachineInstr *, 4> CallInstructions;
+  // The below variables record the return/terminator instructions and the call
+  // instructions respectively; including which register is available as a
+  // temporary register just before the recorded instructions.
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
+  SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
+  // if a temporary register is not available for at least one of the
+  // instructions for which we need to transfer taint to the stack pointer, we
+  // need to insert a full speculation barrier.
+  // TmpRegisterNotAvailableEverywhere tracks that condition.
+  bool TmpRegisterNotAvailableEverywhere = false;
+
+  RegScavenger RS;
+  RS.enterBasicBlock(MBB);
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
+    MachineInstr &MI = *I;
+    if (!MI.isReturn() && !MI.isCall())
+      continue;
 
-    for (MachineInstr &MI : MBB) {
-      if (MI.isReturn())
-        ReturnInstructions.push_back(&MI);
-      else if (MI.isCall())
-        CallInstructions.push_back(&MI);
-    }
+    // The RegScavenger represents registers available *after* the MI
+    // instruction pointed to by RS.getCurrentPosition().
+    // We need to have a register that is available *before* the MI is executed.
+    if (I != MBB.begin())
+      RS.forward(std::prev(I));
+    // FIXME: The below just finds *a* unused register. Maybe code could be
+    // optimized more if this looks for the register that isn't used for the
+    // longest time around this place, to enable more scheduling freedom. Not
+    // sure if that would actually result in a big performance difference
+    // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
+    // already to do this - but it's unclear if that could easily be used here.
+    unsigned TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
+    LLVM_DEBUG(dbgs() << "RS finds "
+                      << ((TmpReg == 0) ? "no register " : "register ");
+               if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
+               dbgs() << "to be available at MI " << MI);
+    if (TmpReg == 0)
+      TmpRegisterNotAvailableEverywhere = true;
+    if (MI.isReturn())
+      ReturnInstructions.push_back({&MI, TmpReg});
+    else if (MI.isCall())
+      CallInstructions.push_back({&MI, TmpReg});
+  }
 
-    Modified |=
-        (ReturnInstructions.size() > 0) || (CallInstructions.size() > 0);
+  if (TmpRegisterNotAvailableEverywhere) {
+    // When a temporary register is not available everywhere in this basic
+    // basic block where a propagate-taint-to-sp operation is needed, just
+    // emit a full speculation barrier at the start of this basic block, which
+    // renders the taint/speculation tracking in this basic block unnecessary.
+    insertFullSpeculationBarrier(MBB, MBB.begin(),
+                                 (MBB.begin())->getDebugLoc());
+    UsesFullSpeculationBarrier = true;
+    Modified = true;
+  } else {
+    for (auto MI_Reg : ReturnInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(
+          dbgs()
+          << " About to insert Reg to SP taint propagation with temp register "
+          << printReg(MI_Reg.second, TRI)
+          << " on instruction: " << *MI_Reg.first);
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
+    }
 
-    for (MachineInstr *Return : ReturnInstructions)
-      insertRegToSPTaintPropagation(Return->getParent(), Return, AArch64::X17);
-    for (MachineInstr *Call : CallInstructions) {
+    for (auto MI_Reg : CallInstructions) {
+      assert(MI_Reg.second != 0);
+      LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
+                           "propagation with temp register "
+                        << printReg(MI_Reg.second, TRI)
+                        << " around instruction: " << *MI_Reg.first);
       // Just after the call:
-      MachineBasicBlock::iterator i = Call;
-      i++;
-      insertSPToRegTaintPropagation(Call->getParent(), i);
+      insertSPToRegTaintPropagation(
+          MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
       // Just before the call:
-      insertRegToSPTaintPropagation(Call->getParent(), Call, AArch64::X17);
+      insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
+      Modified = true;
     }
   }
-
   return Modified;
 }
 
 void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) const {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
   // If full control flow speculation barriers are used, emit a control flow
   // barrier to block potential miss-speculation in flight coming in to this
   // function.
   if (UseControlFlowSpeculationBarrier) {
-    // insert full control flow speculation barrier (DSB SYS + ISB)
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::DSB)).addImm(0xf);
-    BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ISB)).addImm(0xf);
+    insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
     return;
   }
 
   // CMP   SP, #0   === SUBS   xzr, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
       .addDef(AArch64::XZR)
       .addUse(AArch64::SP)
       .addImm(0)
       .addImm(0); // no shift
   // CSETM x16, NE  === CSINV  x16, xzr, xzr, EQ
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
       .addDef(MisspeculatingTaintReg)
       .addUse(AArch64::XZR)
       .addUse(AArch64::XZR)
@@ -319,7 +380,7 @@ void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
 }
 
 void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
-    MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     unsigned TmpReg) const {
   // If full control flow speculation barriers are used, there will not be
   // miss-speculation when returning from this function, and therefore, also
@@ -328,19 +389,19 @@ void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
     return;
 
   // mov   Xtmp, SP  === ADD  Xtmp, SP, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
       .addDef(TmpReg)
       .addUse(AArch64::SP)
       .addImm(0)
       .addImm(0); // no shift
   // and   Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
       .addDef(TmpReg, RegState::Renamable)
       .addUse(TmpReg, RegState::Kill | RegState::Renamable)
       .addUse(MisspeculatingTaintReg, RegState::Kill)
       .addImm(0);
   // mov   SP, Xtmp === ADD SP, Xtmp, #0
-  BuildMI(*MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
+  BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
       .addDef(AArch64::SP)
       .addUse(TmpReg, RegState::Kill)
       .addImm(0)
@@ -484,7 +545,8 @@ bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
 /// \brief If MBBI references a pseudo instruction that should be expanded
 /// here, do the expansion and return true. Otherwise return false.
 bool AArch64SpeculationHardening::expandSpeculationSafeValue(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    bool UsesFullSpeculationBarrier) {
   MachineInstr &MI = *MBBI;
   unsigned Opcode = MI.getOpcode();
   bool Is64Bit = true;
@@ -499,7 +561,7 @@ bool AArch64SpeculationHardening::expandSpeculationSafeValue(
     // Just remove the SpeculationSafe pseudo's if control flow
     // miss-speculation isn't happening because we're already inserting barriers
     // to guarantee that.
-    if (!UseControlFlowSpeculationBarrier) {
+    if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned SrcReg = MI.getOperand(1).getReg();
       // Mark this register and all its aliasing registers as needing to be
@@ -537,7 +599,7 @@ bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
 }
 
 bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
-    MachineBasicBlock &MBB) {
+    MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
   bool Modified = false;
 
   RegsNeedingCSDBBeforeUse.reset();
@@ -572,15 +634,16 @@ bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
           break;
         }
 
-    if (NeedToEmitBarrier)
+    if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
       Modified |= insertCSDB(MBB, MBBI, DL);
 
-    Modified |= expandSpeculationSafeValue(MBB, MBBI);
+    Modified |=
+        expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
 
     MBBI = NMBBI;
   }
 
-  if (RegsNeedingCSDBBeforeUse.any())
+  if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
     Modified |= insertCSDB(MBB, MBBI, DL);
 
   return Modified;
@@ -609,7 +672,7 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
       Modified |= slhLoads(MBB);
   }
 
-  // 2.a Add instrumentation code to function entry and exits.
+  // 2. Add instrumentation code to function entry and exits.
   LLVM_DEBUG(
       dbgs()
       << "***** AArch64SpeculationHardening - track control flow *****\n");
@@ -620,17 +683,15 @@ bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
     EntryBlocks.push_back(LPI.LandingPadBlock);
   for (auto Entry : EntryBlocks)
     insertSPToRegTaintPropagation(
-        Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
-
-  // 2.b Add instrumentation code to every basic block.
-  for (auto &MBB : MF)
-    Modified |= instrumentControlFlow(MBB);
+        *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
 
-  LLVM_DEBUG(dbgs() << "***** AArch64SpeculationHardening - Lowering "
-                       "SpeculationSafeValue Pseudos *****\n");
-  // Step 3: Lower SpeculationSafeValue pseudo instructions.
-  for (auto &MBB : MF)
-    Modified |= lowerSpeculationSafeValuePseudos(MBB);
+  // 3. Add instrumentation code to every basic block.
+  for (auto &MBB : MF) {
+    bool UsesFullSpeculationBarrier = false;
+    Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
+    Modified |=
+        lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
+  }
 
   return Modified;
 }
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d2fed6861477..f10d100bfe11 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -65,10 +65,7 @@ class MCInstrInfo;
 
 } // end namespace llvm
 
-static cl::opt<bool>
-EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
-              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
-              cl::init(true));
+extern cl::opt<bool> EmitJalrReloc;
 
 namespace {
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 18d7dd99be34..2f2dd4e03c40 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -15,6 +15,13 @@
 
 using namespace llvm;
 
+// Note: this option is defined here to be visible from libLLVMMipsAsmParser
+//       and libLLVMMipsCodeGen
+cl::opt<bool>
+EmitJalrReloc("mips-jalr-reloc", cl::Hidden,
+              cl::desc("MIPS: Emit R_{MICRO}MIPS_JALR relocation with jalr"),
+              cl::init(true));
+
 namespace {
 static const MCPhysReg O32IntRegs[4] = {Mips::A0, Mips::A1, Mips::A2, Mips::A3};
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index a90db2384c46..ab8a6753eadc 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -89,7 +89,10 @@ namespace MipsII {
     MO_GOT_HI16,
     MO_GOT_LO16,
     MO_CALL_HI16,
-    MO_CALL_LO16
+    MO_CALL_LO16,
+
+    /// Helper operand used to generate R_MIPS_JALR
+    MO_JALR
   };
 
   enum {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index f43a4d980f92..e3dcbaccfd08 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -614,8 +614,9 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
       llvm_unreachable("Unhandled fixup kind!");
       break;
     case MipsMCExpr::MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-      break;
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getExprOpValue(MipsExpr->getSubExpr(), Fixups, STI);
     case MipsMCExpr::MEK_CALL_HI16:
       FixupKind = Mips::fixup_Mips_CALL_HI16;
       break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 99857e083c6c..2d7312725205 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -44,8 +44,10 @@ void MipsMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
   case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
+    // MEK_DTPREL is used for marking TLS DIEExpr only
+    // and contains a regular sub-expression.
+    getSubExpr()->print(OS, MAI, true);
+    return;
   case MEK_CALL_HI16:
     OS << "%call_hi";
     break;
@@ -161,7 +163,9 @@ MipsMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
     case MEK_Special:
       llvm_unreachable("MEK_None and MEK_Special are invalid");
     case MEK_DTPREL:
-      llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
+      // MEK_DTPREL is used for marking TLS DIEExpr only
+      // and contains a regular sub-expression.
+      return getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup);
     case MEK_DTPREL_HI:
     case MEK_DTPREL_LO:
     case MEK_GOT:
@@ -249,9 +253,6 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
   case MEK_Special:
     llvm_unreachable("MEK_None and MEK_Special are invalid");
     break;
-  case MEK_DTPREL:
-    llvm_unreachable("MEK_DTPREL is used for TLS DIEExpr only");
-    break;
   case MEK_CALL_HI16:
   case MEK_CALL_LO16:
   case MEK_GOT:
@@ -274,6 +275,7 @@ void MipsMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
     if (const MipsMCExpr *E = dyn_cast<const MipsMCExpr>(getSubExpr()))
       E->fixELFSymbolsInTLSFixups(Asm);
     break;
+  case MEK_DTPREL:
   case MEK_DTPREL_HI:
   case MEK_DTPREL_LO:
   case MEK_TLSLDM:
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 814918d25e70..c441aa76ad40 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -460,6 +460,7 @@ class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
   let isCall = 1;
   let hasDelaySlot = 0;
   let Defs = [RA];
+  let hasPostISelHook = 1;
 }
 class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
 
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index af380a0ec71e..ccc4f04bb92d 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -426,6 +426,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
   let isCall = 1;
   let hasDelaySlot = 1;
   let Defs = [RA];
+  let hasPostISelHook = 1;
 }
 
 // 16-bit Jump Reg
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index 2bd0cf2d59a6..fb239f572ef2 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -1105,7 +1105,7 @@ def : MipsPat<(select i32:$cond, immz, i32:$f),
 
 // Pseudo instructions
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
-    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
+    hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT], hasPostISelHook = 1 in {
   class TailCallRegR6<Instruction JumpInst, Register RT, RegisterOperand RO> :
     PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
     PseudoInstExpansion<(JumpInst RT:$rt, RO:$rs)>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 362431fd42a6..a7a748b0840e 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -68,6 +68,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "mips-asm-printer"
 
+extern cl::opt<bool> EmitJalrReloc;
+
 MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
   return static_cast<MipsTargetStreamer &>(*OutStreamer->getTargetStreamer());
 }
@@ -148,6 +150,40 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
   EmitToStreamer(OutStreamer, TmpInst0);
 }
 
+// If there is an MO_JALR operand, insert:
+//
+// .reloc tmplabel, R_{MICRO}MIPS_JALR, symbol
+// tmplabel:
+//
+// This is an optimization hint for the linker which may then replace
+// an indirect call with a direct branch.
+static void emitDirectiveRelocJalr(const MachineInstr &MI,
+                                   MCContext &OutContext,
+                                   TargetMachine &TM,
+                                   MCStreamer &OutStreamer,
+                                   const MipsSubtarget &Subtarget) {
+  for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands();
+       I < E; ++I) {
+    MachineOperand MO = MI.getOperand(I);
+    if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) {
+      MCSymbol *Callee = MO.getMCSymbol();
+      if (Callee && !Callee->getName().empty()) {
+        MCSymbol *OffsetLabel = OutContext.createTempSymbol();
+        const MCExpr *OffsetExpr =
+            MCSymbolRefExpr::create(OffsetLabel, OutContext);
+        const MCExpr *CaleeExpr =
+            MCSymbolRefExpr::create(Callee, OutContext);
+        OutStreamer.EmitRelocDirective
+            (*OffsetExpr,
+             Subtarget.inMicroMipsMode() ? "R_MICROMIPS_JALR" : "R_MIPS_JALR",
+             CaleeExpr, SMLoc(), *TM.getMCSubtargetInfo());
+        OutStreamer.EmitLabel(OffsetLabel);
+        return;
+      }
+    }
+  }
+}
+
 void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   MipsTargetStreamer &TS = getTargetStreamer();
   unsigned Opc = MI->getOpcode();
@@ -207,6 +243,11 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  if (EmitJalrReloc &&
+      (MI->isReturn() || MI->isCall() || MI->isIndirectBranch())) {
+    emitDirectiveRelocJalr(*MI, OutContext, TM, *OutStreamer, *Subtarget);
+  }
+
   MachineBasicBlock::const_instr_iterator I = MI->getIterator();
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
 
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 22ade31a72cd..a18416b9e861 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -56,6 +56,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -75,6 +76,8 @@
 
 using namespace llvm;
 
+extern cl::opt<bool> EmitJalrReloc;
+
 namespace {
 
 class MipsFastISel final : public FastISel {
@@ -1551,6 +1554,16 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
 
   CLI.Call = MIB;
 
+  if (EmitJalrReloc && !Subtarget->inMips16Mode()) {
+    // Attach callee address to the instruction, let asm printer emit
+    // .reloc R_MIPS_JALR.
+    if (Symbol)
+      MIB.addSym(Symbol, MipsII::MO_JALR);
+    else
+      MIB.addSym(FuncInfo.MF->getContext().getOrCreateSymbol(
+	                   Addr.getGlobalValue()->getName()), MipsII::MO_JALR);
+  }
+
   // Finish off the call including any return values.
   return finishCall(CLI, RetVT, NumBytes);
 }
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 8c2a364cdfa9..0f9c075ba0cc 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -57,6 +57,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CodeGen.h"
@@ -91,6 +92,8 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
                cl::desc("MIPS: Don't trap on integer division by zero."),
                cl::init(false));
 
+extern cl::opt<bool> EmitJalrReloc;
+
 static const MCPhysReg Mips64DPRegs[8] = {
   Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
   Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
@@ -2879,6 +2882,54 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
     Ops.push_back(InFlag);
 }
 
+void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                                       SDNode *Node) const {
+  switch (MI.getOpcode()) {
+    default:
+      return;
+    case Mips::JALR:
+    case Mips::JALRPseudo:
+    case Mips::JALR64:
+    case Mips::JALR64Pseudo:
+    case Mips::JALR16_MM:
+    case Mips::JALRC16_MMR6:
+    case Mips::TAILCALLREG:
+    case Mips::TAILCALLREG64:
+    case Mips::TAILCALLR6REG:
+    case Mips::TAILCALL64R6REG:
+    case Mips::TAILCALLREG_MM:
+    case Mips::TAILCALLREG_MMR6: {
+      if (!EmitJalrReloc ||
+          Subtarget.inMips16Mode() ||
+          !isPositionIndependent() ||
+          Node->getNumOperands() < 1 ||
+          Node->getOperand(0).getNumOperands() < 2) {
+        return;
+      }
+      // We are after the callee address, set by LowerCall().
+      // If added to MI, asm printer will emit .reloc R_MIPS_JALR for the
+      // symbol.
+      const SDValue TargetAddr = Node->getOperand(0).getOperand(1);
+      StringRef Sym;
+      if (const GlobalAddressSDNode *G =
+              dyn_cast_or_null<const GlobalAddressSDNode>(TargetAddr)) {
+        Sym = G->getGlobal()->getName();
+      }
+      else if (const ExternalSymbolSDNode *ES =
+                   dyn_cast_or_null<const ExternalSymbolSDNode>(TargetAddr)) {
+        Sym = ES->getSymbol();
+      }
+
+      if (Sym.empty())
+        return;
+
+      MachineFunction *MF = MI.getParent()->getParent();
+      MCSymbol *S = MF->getContext().getOrCreateSymbol(Sym);
+      MI.addOperand(MachineOperand::CreateMCSymbol(S, MipsII::MO_JALR));
+    }
+  }
+}
+
 /// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 SDValue
@@ -2930,7 +2981,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // the maximum out going argument area (including the reserved area), and
   // preallocates the stack space on entrance to the caller.
   //
-  // FIXME: We should do the same for efficency and space.
+  // FIXME: We should do the same for efficiency and space.
 
   // Note: The check on the calling convention below must match
   //       MipsABIInfo::GetCalleeAllocdArgSizeInBytes().
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index e043f133a09f..c88633be02b7 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -341,6 +341,9 @@ class TargetRegisterClass;
     EmitInstrWithCustomInserter(MachineInstr &MI,
                                 MachineBasicBlock *MBB) const override;
 
+    void AdjustInstrPostInstrSelection(MachineInstr &MI,
+                                       SDNode *Node) const override;
+
     void HandleByVal(CCState *, unsigned &, unsigned) const override;
 
     unsigned getRegisterByName(const char* RegName, EVT VT,
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index bfb4c775205d..e38bef4663a7 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -653,6 +653,16 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
 
     MIB.addImm(0);
 
+    // If I has an MCSymbol operand (used by asm printer, to emit R_MIPS_JALR),
+    // add it to the new instruction.
+    for (unsigned J = I->getDesc().getNumOperands(), E = I->getNumOperands();
+         J < E; ++J) {
+      const MachineOperand &MO = I->getOperand(J);
+      if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR))
+        MIB.addSym(MO.getMCSymbol(), MipsII::MO_JALR);
+    }
+
+
   } else {
     for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
       if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
@@ -825,7 +835,8 @@ MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
     {MO_GOT_HI16,     "mips-got-hi16"},
     {MO_GOT_LO16,     "mips-got-lo16"},
     {MO_CALL_HI16,    "mips-call-hi16"},
-    {MO_CALL_LO16,    "mips-call-lo16"}
+    {MO_CALL_LO16,    "mips-call-lo16"},
+    {MO_JALR,         "mips-jalr"}
   };
   return makeArrayRef(Flags);
 }
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index d9398b7d6024..46721e6cb9e5 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1623,11 +1623,15 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in {
   class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
                           Register RetReg, RegisterOperand ResRO = RO>:
     PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
-    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
+    PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)> {
+    let hasPostISelHook = 1;
+  }
 
   class JumpLinkReg<string opstr, RegisterOperand RO>:
     InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
-           [], II_JALR, FrmR, opstr>;
+           [], II_JALR, FrmR, opstr> {
+    let hasPostISelHook = 1;
+  }
 
   class BGEZAL_FT<string opstr, DAGOperand opnd,
                   RegisterOperand RO> :
@@ -1646,7 +1650,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
 
   class TailCallReg<Instruction JumpInst, RegisterOperand RO> :
     PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
-    PseudoInstExpansion<(JumpInst RO:$rs)>;
+    PseudoInstExpansion<(JumpInst RO:$rs)> {
+    let hasPostISelHook = 1;
+  }
 }
 
 class BAL_BR_Pseudo<Instruction RealInst, DAGOperand opnd> :
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 46b37ceae391..4a7c0ce2be19 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -117,6 +117,8 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case MipsII::MO_CALL_LO16:
     TargetKind = MipsMCExpr::MEK_CALL_LO16;
     break;
+  case MipsII::MO_JALR:
+    return MCOperand();
   }
 
   switch (MOTy) {
diff --git a/lib/Target/X86/X86DiscriminateMemOps.cpp b/lib/Target/X86/X86DiscriminateMemOps.cpp
index 3654bf04f4e9..6bee20b617dd 100644
--- a/lib/Target/X86/X86DiscriminateMemOps.cpp
+++ b/lib/Target/X86/X86DiscriminateMemOps.cpp
@@ -27,6 +27,14 @@ using namespace llvm;
 
 #define DEBUG_TYPE "x86-discriminate-memops"
 
+static cl::opt<bool> EnableDiscriminateMemops(
+    DEBUG_TYPE, cl::init(false),
+    cl::desc("Generate unique debug info for each instruction with a memory "
+             "operand. Should be enabled for profile-drived cache prefetching, "
+             "both in the build of the binary being profiled, as well as in "
+             "the build of the binary consuming the profile."),
+    cl::Hidden);
+
 namespace {
 
 using Location = std::pair<StringRef, unsigned>;
@@ -67,6 +75,9 @@ char X86DiscriminateMemOps::ID = 0;
 X86DiscriminateMemOps::X86DiscriminateMemOps() : MachineFunctionPass(ID) {}
 
 bool X86DiscriminateMemOps::runOnMachineFunction(MachineFunction &MF) {
+  if (!EnableDiscriminateMemops)
+    return false;
+
   DISubprogram *FDI = MF.getFunction().getSubprogram();
   if (!FDI || !FDI->getUnit()->getDebugInfoForProfiling())
     return false;
diff --git a/lib/Target/X86/X86InsertPrefetch.cpp b/lib/Target/X86/X86InsertPrefetch.cpp
index 30b46a09ef0f..8bd57aa2278b 100644
--- a/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/lib/Target/X86/X86InsertPrefetch.cpp
@@ -34,7 +34,8 @@ using namespace sampleprof;
 
 static cl::opt<std::string>
     PrefetchHintsFile("prefetch-hints-file",
-                      cl::desc("Path to the prefetch hints profile."),
+                      cl::desc("Path to the prefetch hints profile. See also "
+                               "-x86-discriminate-memops"),
                       cl::Hidden);
 namespace {
 
diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp
index a9772e31da50..81d63ee80394 100644
--- a/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -249,6 +249,8 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
   bool DoPromote = false;
   if (GV.hasLocalLinkage() &&
       ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+    // Save the original name string before we rename GV below.
+    auto Name = GV.getName().str();
     // Once we change the name or linkage it is difficult to determine
     // again whether we should promote since shouldPromoteLocalToGlobal needs
     // to locate the summary (based on GUID from name and linkage). Therefore,
@@ -257,6 +259,12 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
     GV.setLinkage(getLinkage(&GV, DoPromote));
     if (!GV.hasLocalLinkage())
       GV.setVisibility(GlobalValue::HiddenVisibility);
+
+    // If we are renaming a COMDAT leader, ensure that we record the COMDAT
+    // for later renaming as well. This is required for COFF.
+    if (const auto *C = GV.getComdat())
+      if (C->getName() == Name)
+        RenamedComdats.try_emplace(C, M.getOrInsertComdat(GV.getName()));
   } else
     GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
 
@@ -281,6 +289,16 @@ void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
     processGlobalForThinLTO(SF);
   for (GlobalAlias &GA : M.aliases())
     processGlobalForThinLTO(GA);
+
+  // Replace any COMDATS that required renaming (because the COMDAT leader was
+  // promoted and renamed).
+  if (!RenamedComdats.empty())
+    for (auto &GO : M.global_objects())
+      if (auto *C = GO.getComdat()) {
+        auto Replacement = RenamedComdats.find(C);
+        if (Replacement != RenamedComdats.end())
+          GO.setComdat(Replacement->second);
+      }
 }
 
 bool FunctionImportGlobalProcessing::run() {
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index a93d1aeb62ef..112e80d27e34 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -217,7 +217,10 @@ static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
     // When the value is absent it is interpreted as 'attribute set'.
     return true;
   case 2:
-    return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
+    if (ConstantInt *IntMD =
+            mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get()))
+      return IntMD->getZExtValue();
+    return true;
   }
   llvm_unreachable("unexpected number of options");
 }
@@ -376,17 +379,17 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
   Optional<int> InterleaveCount =
       getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
 
-  if (Enable == true) {
-    // 'Forcing' vector width and interleave count to one effectively disables
-    // this tranformation.
-    if (VectorizeWidth == 1 && InterleaveCount == 1)
-      return TM_SuppressedByUser;
-    return TM_ForcedByUser;
-  }
+  // 'Forcing' vector width and interleave count to one effectively disables
+  // this tranformation.
+  if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
+    return TM_SuppressedByUser;
 
   if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
     return TM_Disable;
 
+  if (Enable == true)
+    return TM_ForcedByUser;
+
   if (VectorizeWidth == 1 && InterleaveCount == 1)
     return TM_Disable;
 
diff --git a/test/CodeGen/AArch64/build-vector-extract.ll b/test/CodeGen/AArch64/build-vector-extract.ll
new file mode 100644
index 000000000000..a785533e8db9
--- /dev/null
+++ b/test/CodeGen/AArch64/build-vector-extract.ll
@@ -0,0 +1,441 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    zip1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zip1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[2]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[3]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    zip1 v1.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 0
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 1
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov v0.s[3], wzr
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[2]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 2
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #4
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) {
+; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, v0.s[3]
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <4 x i32> %x, i32 3
+  %z = zext i32 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[0], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[0]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 0
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[1]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 1
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 2
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    dup v0.2d, x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> undef, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) {
+; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umov w8, v0.h[3]
+; CHECK-NEXT:    and x8, x8, #0xffff
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    mov v0.d[1], x8
+; CHECK-NEXT:    ret
+  %e = extractelement <8 x i16> %x, i32 3
+  %z = zext i16 %e to i64
+  %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1
+  ret <2 x i64> %r
+}
+
+; This would crash because we did not expect to create
+; a shuffle for a vector where the source operand is
+; not the same size as the result.
+; TODO: Should we handle this pattern? Ie, is moving to/from
+; registers the optimal code?
+
+define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
+; CHECK-LABEL: larger_bv_than_source:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %t1 = extractelement <4 x i16> %t0, i32 2
+  %vgetq_lane = zext i16 %t1 to i32
+  %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0
+  ret <4 x i32> %t2
+}
+
diff --git a/test/CodeGen/AArch64/eh_recoverfp.ll b/test/CodeGen/AArch64/eh_recoverfp.ll
new file mode 100644
index 000000000000..777bcee54382
--- /dev/null
+++ b/test/CodeGen/AArch64/eh_recoverfp.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple arm64-windows %s -o - 2>&1 | FileCheck %s
+
+define i8* @foo(i8* %a) {
+; CHECK-LABEL: foo
+; CHECK-NOT: llvm.x86.seh.recoverfp
+  %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @f to i8*), i8* %a)
+  ret i8* %1
+}
+
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+declare i32 @f()
diff --git a/test/CodeGen/AArch64/speculation-hardening-loads.ll b/test/CodeGen/AArch64/speculation-hardening-loads.ll
index 0b8f8d31b316..e90fb19a522a 100644
--- a/test/CodeGen/AArch64/speculation-hardening-loads.ll
+++ b/test/CodeGen/AArch64/speculation-hardening-loads.ll
@@ -11,10 +11,10 @@ entry:
 ; CHECK-NEXT: and   x8, x8, x16
 ; CHECK-NEXT: and   x1, x1, x16
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
-; CHECK-NEXT: and x17, x17, x16
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
 ; CHECK-NEXT: mov x0, x8
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -29,9 +29,9 @@ entry:
 ; CHECK-NEXT: and   x0, x0, x16
 ; CHECK-NEXT: csdb
 ; CHECK-NEXT: ldr   d0, [x0]
-; CHECK-NEXT: mov x17, sp
-; CHECK-NEXT: and x17, x17, x16
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -51,12 +51,12 @@ entry:
 ; CHECK-NEXT: and x8, x8, x16
 ; csdb instruction must occur before the add instruction with w8 as operand.
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
 ; CHECK-NEXT: add w9, w1, w8
 ; CHECK-NEXT: cmp x8, #0
-; CHECK-NEXT: and x17, x17, x16
 ; CHECK-NEXT: csel w0, w1, w9, eq
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -76,12 +76,12 @@ entry:
 ; CHECK-NEXT: and w8, w8, w16
 ; csdb instruction must occur before the add instruction with x8 as operand.
 ; CHECK-NEXT: csdb
-; CHECK-NEXT: mov x17, sp
 ; CHECK-NEXT: add x9, x1, x8
 ; CHECK-NEXT: cmp w8, #0
-; CHECK-NEXT: and x17, x17, x16
 ; CHECK-NEXT: csel x0, x1, x9, eq
-; CHECK-NEXT: mov sp, x17
+; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT: mov sp, [[TMPREG]]
 ; CHECK-NEXT: ret
 }
 
@@ -112,11 +112,11 @@ entry:
 ; CHECK-NEXT:  and     x1, x1, x16
 ; CHECK-NEXT:  csdb
 ; CHECK-NEXT:  ldr     d0, [x1]
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
 ; CHECK-NEXT:  mov     v0.d[1], v0.d[0]
 ; CHECK-NEXT:  str     q0, [x0]
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
 }
 
@@ -129,9 +129,9 @@ entry:
 ; CHECK-NEXT:  and     x1, x1, x16
 ; CHECK-NEXT:  csdb
 ; CHECK-NEXT:  ld1     { v0.d }[0], [x1]
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
   %0 = load double, double* %b, align 16
   %vld1_lane = insertelement <2 x double> <double undef, double 0.000000e+00>, double %0, i32 0
@@ -147,9 +147,9 @@ entry:
 ; CHECK-NEXT:  .cfi_def_cfa_offset 16
 ; CHECK-NEXT:  ldr     w8, [sp, #12]
 ; CHECK-NEXT:  add     sp, sp, #16
-; CHECK-NEXT:  mov     x17, sp
-; CHECK-NEXT:  and     x17, x17, x16
-; CHECK-NEXT:  mov     sp, x17
+; CHECK-NEXT:  mov     [[TMPREG:x[0-9]+]], sp
+; CHECK-NEXT:  and     [[TMPREG]], [[TMPREG]], x16
+; CHECK-NEXT:  mov     sp, [[TMPREG]]
 ; CHECK-NEXT:  ret
   %a = alloca i32, align 4
   %val = load volatile i32, i32* %a, align 4
diff --git a/test/CodeGen/AArch64/speculation-hardening.ll b/test/CodeGen/AArch64/speculation-hardening.ll
index 3535b63c32cc..51156f68dec8 100644
--- a/test/CodeGen/AArch64/speculation-hardening.ll
+++ b/test/CodeGen/AArch64/speculation-hardening.ll
@@ -1,9 +1,9 @@
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH --dump-input-on-failure
-; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,SLH,GISELSLH --dump-input-on-failure
+; RUN sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,GISELNOSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR/speculative_load_hardening/' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,SLH,NOGISELSLH --dump-input-on-failure
+; RUN: sed -e 's/SLHATTR//' %s | llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -fast-isel | FileCheck %s --check-prefixes=CHECK,NOSLH,NOGISELNOSLH --dump-input-on-failure
 
 define i32 @f(i8* nocapture readonly %p, i32 %i, i32 %N) local_unnamed_addr SLHATTR {
 ; CHECK-LABEL: f
@@ -13,12 +13,12 @@ entry:
 ; NOSLH-NOT:  cmp sp, #0
 ; NOSLH-NOT:  csetm x16, ne
 
-; SLH:  mov x17, sp
-; SLH:  and x17, x17, x16
-; SLH:  mov sp, x17
-; NOSLH-NOT:  mov x17, sp
-; NOSLH-NOT:  and x17, x17, x16
-; NOSLH-NOT:  mov sp, x17
+; SLH:  mov [[TMPREG:x[0-9]+]], sp
+; SLH:  and [[TMPREG]], [[TMPREG]], x16
+; SLH:  mov sp, [[TMPREG]]
+; NOSLH-NOT:  mov [[TMPREG:x[0-9]+]], sp
+; NOSLH-NOT:  and [[TMPREG]], [[TMPREG]], x16
+; NOSLH-NOT:  mov sp, [[TMPREG]]
   %call = tail call i32 @tail_callee(i32 %i)
 ; SLH:  cmp sp, #0
 ; SLH:  csetm x16, ne
@@ -43,29 +43,37 @@ if.then:                                          ; preds = %entry
 ; NOSLH-NOT: csel x16, x16, xzr, [[COND]]
 return:                                           ; preds = %entry, %if.then
   %retval.0 = phi i32 [ %conv, %if.then ], [ 0, %entry ]
-; SLH:  mov x17, sp
-; SLH:  and x17, x17, x16
-; SLH:  mov sp, x17
-; NOSLH-NOT:  mov x17, sp
-; NOSLH-NOT:  and x17, x17, x16
-; NOSLH-NOT:  mov sp, x17
+; SLH:  mov [[TMPREG:x[0-9]+]], sp
+; SLH:  and [[TMPREG]], [[TMPREG]], x16
+; SLH:  mov sp, [[TMPREG]]
+; NOSLH-NOT:  mov [[TMPREG:x[0-9]+]], sp
+; NOSLH-NOT:  and [[TMPREG]], [[TMPREG]], x16
+; NOSLH-NOT:  mov sp, [[TMPREG]]
   ret i32 %retval.0
 }
 
 ; Make sure that for a tail call, taint doesn't get put into SP twice.
 define i32 @tail_caller(i32 %a) local_unnamed_addr SLHATTR {
 ; CHECK-LABEL: tail_caller:
-; SLH:     mov     x17, sp
-; SLH:     and     x17, x17, x16
-; SLH:     mov     sp, x17
-; NOSLH-NOT:     mov     x17, sp
-; NOSLH-NOT:     and     x17, x17, x16
-; NOSLH-NOT:     mov     sp, x17
+; NOGISELSLH:     mov [[TMPREG:x[0-9]+]], sp
+; NOGISELSLH:     and [[TMPREG]], [[TMPREG]], x16
+; NOGISELSLH:     mov sp, [[TMPREG]]
+; NOGISELNOSLH-NOT:     mov [[TMPREG:x[0-9]+]], sp
+; NOGISELNOSLH-NOT:     and [[TMPREG]], [[TMPREG]], x16
+; NOGISELNOSLH-NOT:     mov sp, [[TMPREG]]
+; GISELSLH:     mov [[TMPREG:x[0-9]+]], sp
+; GISELSLH:     and [[TMPREG]], [[TMPREG]], x16
+; GISELSLH:     mov sp, [[TMPREG]]
+; GISELNOSLH-NOT:     mov [[TMPREG:x[0-9]+]], sp
+; GISELNOSLH-NOT:     and [[TMPREG]], [[TMPREG]], x16
+; GISELNOSLH-NOT:     mov sp, [[TMPREG]]
 ;  GlobalISel doesn't optimize tail calls (yet?), so only check that
 ;  cross-call taint register setup code is missing if a tail call was
 ;  actually produced.
-; SLH:     {{(bl tail_callee[[:space:]] cmp sp, #0)|(b tail_callee)}}
-; SLH-NOT: cmp sp, #0
+; NOGISELSLH:     b tail_callee
+; GISELSLH:       bl tail_callee
+; GISELSLH:       cmp sp, #0
+; SLH-NOT:        cmp sp, #0
   %call = tail call i32 @tail_callee(i32 %a)
   ret i32 %call
 }
diff --git a/test/CodeGen/AArch64/speculation-hardening.mir b/test/CodeGen/AArch64/speculation-hardening.mir
index cf8357d9558b..5991c4df0407 100644
--- a/test/CodeGen/AArch64/speculation-hardening.mir
+++ b/test/CodeGen/AArch64/speculation-hardening.mir
@@ -25,6 +25,22 @@
   define void @indirectbranch(i32 %a, i32 %b) speculative_load_hardening {
    ret void
   }
+  ; Also check that a non-default temporary register gets picked correctly to
+  ; transfer the SP to to and it with the taint register when the default
+  ; temporary isn't available.
+  define void @indirect_call_x17(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  @g = common dso_local local_unnamed_addr global i64 (...)* null, align 8
+  define void @indirect_tailcall_x17(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  define void @indirect_call_lr(i32 %a, i32 %b) speculative_load_hardening {
+   ret void
+  }
+  define void @RS_cannot_find_available_regs() speculative_load_hardening {
+   ret void
+  }
 ...
 ---
 name:            nobranch_fallthrough
@@ -115,3 +131,72 @@ body:             |
   ; CHECK-NOT: csel
    RET undef $lr, implicit $x0
 ...
+---
+name:            indirect_call_x17
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x17
+    ; CHECK-LABEL: indirect_call_x17
+    ; CHECK:       mov x0, sp
+    ; CHECK:       and x0, x0, x16
+    ; CHECK:       mov sp, x0
+    ; CHECK:       blr x17
+    BLR killed renamable $x17, implicit-def dead $lr, implicit $sp
+    RET undef $lr, implicit undef $w0
+...
+---
+name:           indirect_tailcall_x17
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: indirect_tailcall_x17
+    ; CHECK:       mov x1, sp
+    ; CHECK:       and x1, x1, x16
+    ; CHECK:       mov sp, x1
+    ; CHECK:       br x17
+    $x8 = ADRP target-flags(aarch64-page) @g
+    $x17 = LDRXui killed $x8, target-flags(aarch64-pageoff, aarch64-nc) @g
+    TCRETURNri killed $x17, 0, implicit $sp, implicit $x0
+...
+---
+name:           indirect_call_lr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: indirect_call_lr
+    ; CHECK:            mov x1, sp
+    ; CHECK-NEXT:       and x1, x1, x16
+    ; CHECK-NEXT:       mov sp, x1
+    ; CHECK-NEXT:       blr x30
+    liveins: $x0, $lr
+    BLR killed renamable $lr, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
+    $w0 = nsw ADDWri killed $w0, 1, 0
+    RET undef $lr, implicit $w0
+...
+---
+name:           RS_cannot_find_available_regs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; In the rare case when no free temporary register is available for the
+    ; propagate taint-to-sp operation, just put in a full speculation barrier
+    ; (isb+dsb sy) at the start of the basic block. And don't put masks on
+    ; instructions for the rest of the basic block, since speculation in that
+    ; basic block was already done, so no need to do masking.
+    ; CHECK-LABEL: RS_cannot_find_available_regs
+    ; CHECK:       dsb sy
+    ; CHECK-NEXT:  isb
+    ; CHECK-NEXT:  ldr x0, [x0]
+    ; The following 2 instructions come from propagating the taint encoded in
+    ; sp at function entry to x16. It turns out the taint info in x16 is not
+    ; used in this function, so those instructions could be optimized away. An
+    ; optimization for later if it turns out this situation occurs often enough.
+    ; CHECK-NEXT:  cmp sp, #0
+    ; CHECK-NEXT:  csetm x16, ne
+    ; CHECK-NEXT:  ret
+    liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $fp, $lr
+     $x0 = LDRXui killed $x0, 0
+     RET undef $lr, implicit $x0
+...
diff --git a/test/CodeGen/Mips/cconv/vector.ll b/test/CodeGen/Mips/cconv/vector.ll
index 6a07c4f34564..5c7c3f424c38 100644
--- a/test/CodeGen/Mips/cconv/vector.ll
+++ b/test/CodeGen/Mips/cconv/vector.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
-; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
 ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
-; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EB
 ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
-; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
 ; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
-; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler -mips-jalr-reloc=false | FileCheck %s --check-prefixes=ALL,MIPS64R5,MIPS64R5EL
 
 ; Test that vector types are passed through the integer register set whether or
 ; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
diff --git a/test/CodeGen/Mips/gprestore.ll b/test/CodeGen/Mips/gprestore.ll
index 88ac047b6609..a1e696b0ac08 100644
--- a/test/CodeGen/Mips/gprestore.ll
+++ b/test/CodeGen/Mips/gprestore.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=O32
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic | FileCheck %s --check-prefix=N64
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 | FileCheck %s --check-prefix=N32
-; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3O32
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 | FileCheck %s --check-prefix=O3N64
-; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 | FileCheck %s --check-prefix=O3N32
+; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=O32
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -mips-jalr-reloc=false | FileCheck %s --check-prefix=N64
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -mips-jalr-reloc=false | FileCheck %s --check-prefix=N32
+; RUN: llc -mtriple=mips-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3O32
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N64
+; RUN: llc -mtriple=mips64-mti-linux-gnu < %s -relocation-model=pic -target-abi n32 -O3 -mips-jalr-reloc=false | FileCheck %s --check-prefix=O3N32
 
 ; Test that PIC calls use the $25 register. This is an ABI requirement.
 
diff --git a/test/CodeGen/Mips/llvm-ir/sdiv.ll b/test/CodeGen/Mips/llvm-ir/sdiv.ll
index e54eaa63222a..af3d4f50f3fe 100644
--- a/test/CodeGen/Mips/llvm-ir/sdiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/sdiv.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: sdiv_i1:
diff --git a/test/CodeGen/Mips/llvm-ir/srem.ll b/test/CodeGen/Mips/llvm-ir/srem.ll
index ef0502c85d59..487a5b9b6cbc 100644
--- a/test/CodeGen/Mips/llvm-ir/srem.ll
+++ b/test/CodeGen/Mips/llvm-ir/srem.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define signext i1 @srem_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: srem_i1:
diff --git a/test/CodeGen/Mips/llvm-ir/udiv.ll b/test/CodeGen/Mips/llvm-ir/udiv.ll
index 8694a9f92b65..3b7243712024 100644
--- a/test/CodeGen/Mips/llvm-ir/udiv.ll
+++ b/test/CodeGen/Mips/llvm-ir/udiv.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R1
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R2
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) {
 ; GP32-LABEL: udiv_i1:
diff --git a/test/CodeGen/Mips/llvm-ir/urem.ll b/test/CodeGen/Mips/llvm-ir/urem.ll
index b744f706cbf9..4105d67da6f1 100644
--- a/test/CodeGen/Mips/llvm-ir/urem.ll
+++ b/test/CodeGen/Mips/llvm-ir/urem.ll
@@ -1,36 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=mips -mcpu=mips2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R0R2
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R0R2
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP32,GP32R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP32,GP32R2R5
 ; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP32R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP32R6
 
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips4 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R0R1
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R0R1
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r2 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r3 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r5 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefixes=GP64,GP64R2R5
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefixes=GP64,GP64R2R5
 ; RUN: llc < %s -mtriple=mips64 -mcpu=mips64r6 -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=GP64R6
+; RUN:   -mips-jalr-reloc=false | FileCheck %s -check-prefix=GP64R6
 
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR3
-; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips -relocation-model=pic \
-; RUN:   | FileCheck %s -check-prefix=MMR6
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r3 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR3
+; RUN: llc < %s -mtriple=mips -mcpu=mips32r6 -mattr=+micromips \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false | \
+; RUN:   FileCheck %s -check-prefix=MMR6
 
 define signext i1 @urem_i1(i1 signext %a, i1 signext %b) {
 ; GP32-LABEL: urem_i1:
diff --git a/test/CodeGen/Mips/long-call-attr.ll b/test/CodeGen/Mips/long-call-attr.ll
index 5b6ba94aaa35..beda290a9725 100644
--- a/test/CodeGen/Mips/long-call-attr.ll
+++ b/test/CodeGen/Mips/long-call-attr.ll
@@ -1,11 +1,11 @@
 ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=O32 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s
 ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=O32 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=O32 %s
 ; RUN: llc -march=mips64 -target-abi n64 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=N64 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s
 ; RUN: llc -march=mips64 -target-abi n64 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefix=N64 %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefix=N64 %s
 
 declare void @far() #0
 
diff --git a/test/CodeGen/Mips/long-call-mcount.ll b/test/CodeGen/Mips/long-call-mcount.ll
index 70a4410d060b..580f452526f7 100644
--- a/test/CodeGen/Mips/long-call-mcount.ll
+++ b/test/CodeGen/Mips/long-call-mcount.ll
@@ -1,8 +1,8 @@
 ; Check call to mcount in case of long/short call options.
 ; RUN: llc -march=mips -target-abi o32 --mattr=+long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,LONG %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,LONG %s
 ; RUN: llc -march=mips -target-abi o32 --mattr=-long-calls,+noabicalls < %s \
-; RUN:   | FileCheck -check-prefixes=CHECK,SHORT %s
+; RUN:   -mips-jalr-reloc=false | FileCheck -check-prefixes=CHECK,SHORT %s
 
 ; Function Attrs: noinline nounwind optnone
 define void @foo() #0 {
diff --git a/test/CodeGen/Mips/msa/f16-llvm-ir.ll b/test/CodeGen/Mips/msa/f16-llvm-ir.ll
index 9105e9249d4f..8544a75c50a6 100644
--- a/test/CodeGen/Mips/msa/f16-llvm-ir.ll
+++ b/test/CodeGen/Mips/msa/f16-llvm-ir.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR5,MIPS32-O32,MIPS32R5-O32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N32,MIPS64R5-N32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r5 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR5,MIPS64-N64,MIPS64R5-N64
 
 ; RUN: llc -relocation-model=pic -mtriple=mipsel-- -mcpu=mips32r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS32,MIPSR6,MIPSR6-O32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n32 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N32,MIPSR6-N32
 ; RUN: llc -relocation-model=pic -mtriple=mips64el-- -mcpu=mips64r6 \
-; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 < %s | FileCheck %s \
+; RUN:     -mattr=+fp64,+msa -verify-machineinstrs -target-abi n64 -mips-jalr-reloc=false < %s | FileCheck %s \
 ; RUN:     --check-prefixes=ALL,MIPS64,MIPSR6,MIPS64-N64,MIPSR6-N64
 
 
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index 19eb80b79baf..d9951ebeaf3a 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -relocation-model=pic \
+; RUN:     -mips-jalr-reloc=false < %s | FileCheck %s
 
 %0 = type { i8, i16, i32, i64, double, i32, [4 x i8] }
 %struct.S1 = type { i8, i16, i32, i64, double, i32 }
diff --git a/test/CodeGen/Mips/reloc-jalr.ll b/test/CodeGen/Mips/reloc-jalr.ll
new file mode 100644
index 000000000000..f8fd90311004
--- /dev/null
+++ b/test/CodeGen/Mips/reloc-jalr.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips32r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R6,TAILCALL-32R6
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips64r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R6,TAILCALL-64R6
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips32r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2,TAILCALL-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mcpu=mips64r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2,TAILCALL-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mattr=+micromips -mcpu=mips32r2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM,TAILCALL-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mattr=+micromips -mcpu=mips32r6 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 < %s | FileCheck %s -check-prefixes=ALL,JALR-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips32r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R6
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips64r6 -mips-compact-branches=always < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R6
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips32r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-32R2
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mcpu=mips64r6 -mips-compact-branches=never < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-64R2
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mattr=+micromips -mcpu=mips32r2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mattr=+micromips -mcpu=mips32r6 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,JALR-MM
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \
+; RUN:     -O2 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O0 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static -mips-tail-calls=1 \
+; RUN:     -O0 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic -mips-tail-calls=1 \
+; RUN:     -O2 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -mips-tail-calls=1 \
+; RUN:     -O2 -relocation-model=static < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=pic \
+; RUN:     -O0 -mips-jalr-reloc=false < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static \
+; RUN:     -O0 < %s | \
+; RUN:     FileCheck %s -check-prefixes=ALL,NORELOC
+
+define internal void @foo() noinline {
+entry:
+  ret void
+}
+
+define void @checkCall() {
+entry:
+; ALL-LABEL: checkCall:
+  call void @foo()
+;	JALR-32R2: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; JALR-32R2-NEXT: [[TMPLABEL]]:
+;	JALR-32R2-NEXT: 	jalr	$25
+
+;	JALR-64R2: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; JALR-64R2-NEXT: [[TMPLABEL]]:
+;	JALR-64R2-NEXT: 	jalr	$25
+
+;	JALR-MM: 	.reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo
+; JALR-MM-NEXT: [[TMPLABEL]]:
+;	JALR-MM-NEXT: 	jalr	$25
+
+;	JALR-32R6: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; JALR-32R6-NEXT: [[TMPLABEL]]:
+;	JALR-32R6-NEXT: 	jalrc	$25
+
+;	JALR-64R6: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; JALR-64R6-NEXT: [[TMPLABEL]]:
+;	JALR-64R6-NEXT: 	jalrc	$25
+
+; NORELOC-NOT: R_MIPS_JALR
+ ret void
+}
+
+define void @checkTailCall() {
+entry:
+; ALL-LABEL: checkTailCall:
+  tail call void @foo()
+;	TAILCALL-32R2: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; TAILCALL-32R2-NEXT: [[TMPLABEL]]:
+;	TAILCALL-32R2-NEXT: 	jr	$25
+
+;	TAILCALL-64R2: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; TAILCALL-64R2-NEXT: [[TMPLABEL]]:
+;	TAILCALL-64R2-NEXT: 	jr	$25
+
+;	TAILCALL-MM: 	.reloc ([[TMPLABEL:.*]]), R_MICROMIPS_JALR, foo
+; TAILCALL-MM-NEXT: [[TMPLABEL]]:
+;	TAILCALL-MM-NEXT: 	jrc	$25
+
+;	TAILCALL-32R6: 	.reloc ([[TMPLABEL:.*]]), R_MIPS_JALR, foo
+; TAILCALL-32R6-NEXT: [[TMPLABEL]]:
+;	TAILCALL-32R6-NEXT: 	jrc	$25
+
+;	TAILCALL-64R6: 	.reloc [[TMPLABEL:.*]], R_MIPS_JALR, foo
+; TAILCALL-64R6-NEXT: [[TMPLABEL]]:
+;	TAILCALL-64R6-NEXT: 	jrc	$25
+
+; NORELOC-NOT: R_MIPS_JALR
+  ret void
+}
diff --git a/test/CodeGen/Mips/shrink-wrapping.ll b/test/CodeGen/Mips/shrink-wrapping.ll
index 54ae8699d1c1..b08d2f1b6467 100644
--- a/test/CodeGen/Mips/shrink-wrapping.ll
+++ b/test/CodeGen/Mips/shrink-wrapping.ll
@@ -9,11 +9,11 @@
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-STATIC
 
 ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=true \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=SHRINK-WRAP-PIC
 
 ; RUN: llc -mtriple=mips-unknown-linux-gnu -enable-shrink-wrap=false \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-PIC
 
 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \
@@ -25,11 +25,11 @@
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-STATIC
 
 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=true \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=SHRINK-WRAP-64-PIC
 
 ; RUN: llc -mtriple=mips64-unknown-linux-gnu -enable-shrink-wrap=false \
-; RUN:   -relocation-model=pic < %s | \
+; RUN:   -relocation-model=pic -mips-jalr-reloc=false < %s | \
 ; RUN:   FileCheck %s -check-prefix=NO-SHRINK-WRAP-64-PIC
 
 declare void @f(i32 signext)
diff --git a/test/CodeGen/X86/debug-loclists.ll b/test/CodeGen/X86/debug-loclists.ll
index 20bc0c40378b..0c2ab3dfad5a 100644
--- a/test/CodeGen/X86/debug-loclists.ll
+++ b/test/CodeGen/X86/debug-loclists.ll
@@ -11,7 +11,7 @@
 ; CHECK-NEXT:               DW_AT_type [DW_FORM_ref4]     (cu + 0x0040 => {0x00000040} "A")
 
 ; CHECK:      .debug_loclists contents:
-; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000017, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+; CHECK-NEXT: 0x00000000: locations list header: length = 0x00000015, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 ; CHECK-NEXT: 0x00000000:
 ; CHECK-NEXT:  [0x0000000000000000, 0x0000000000000004): DW_OP_breg5 RDI+0
 ; CHECK-NEXT:  [0x0000000000000004, 0x0000000000000012): DW_OP_breg3 RBX+0
@@ -32,13 +32,13 @@
 ; ASM-NEXT:  .byte 4                               # DW_LLE_offset_pair
 ; ASM-NEXT:  .uleb128 .Lfunc_begin0-.Lfunc_begin0  # starting offset
 ; ASM-NEXT:  .uleb128 .Ltmp0-.Lfunc_begin0         # ending offset
-; ASM-NEXT:  .short 2                              # Loc expr size
+; ASM-NEXT:  .byte 2                               # Loc expr size
 ; ASM-NEXT:  .byte 117                             # DW_OP_breg5
 ; ASM-NEXT:  .byte 0                               # 0
 ; ASM-NEXT:  .byte 4                               # DW_LLE_offset_pair
 ; ASM-NEXT:  .uleb128 .Ltmp0-.Lfunc_begin0         # starting offset
 ; ASM-NEXT:  .uleb128 .Ltmp1-.Lfunc_begin0         # ending offset
-; ASM-NEXT:  .short 2                              # Loc expr size
+; ASM-NEXT:  .byte 2                               # Loc expr size
 ; ASM-NEXT:  .byte 115                             # DW_OP_breg3
 ; ASM-NEXT:  .byte 0                               # 0
 ; ASM-NEXT:  .byte 0                               # DW_LLE_end_of_list
diff --git a/test/CodeGen/X86/discriminate-mem-ops.ll b/test/CodeGen/X86/discriminate-mem-ops.ll
index b77a91fafd2c..a30dc22a0d9c 100644
--- a/test/CodeGen/X86/discriminate-mem-ops.ll
+++ b/test/CodeGen/X86/discriminate-mem-ops.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -x86-discriminate-memops  < %s | FileCheck %s
 ;
 ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
 ; int sum(int* arr, int pos1, int pos2) {
diff --git a/test/CodeGen/X86/insert-prefetch-inline.ll b/test/CodeGen/X86/insert-prefetch-inline.ll
index 5f8373f9480c..62c02fa33291 100644
--- a/test/CodeGen/X86/insert-prefetch-inline.ll
+++ b/test/CodeGen/X86/insert-prefetch-inline.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-inline.afdo | FileCheck %s
 ;
 ; Verify we can insert prefetch instructions in code belonging to inlined
 ; functions.
diff --git a/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
index 004fb56a56eb..d0c4ac378b63 100644
--- a/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
+++ b/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-invalid-instr.afdo | FileCheck %s
 ; ModuleID = 'prefetch.cc'
 source_filename = "prefetch.cc"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/CodeGen/X86/insert-prefetch.ll b/test/CodeGen/X86/insert-prefetch.ll
index 9e77772df774..fe0fd9877f19 100644
--- a/test/CodeGen/X86/insert-prefetch.ll
+++ b/test/CodeGen/X86/insert-prefetch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s
-; RUN: llc < %s -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch.afdo | FileCheck %s
+; RUN: llc < %s -x86-discriminate-memops -prefetch-hints-file=%S/insert-prefetch-other.afdo | FileCheck %s -check-prefix=OTHERS
 ;
 ; original source, compiled with -O3 -gmlt -fdebug-info-for-profiling:
 ; int sum(int* arr, int pos1, int pos2) {
diff --git a/test/DebugInfo/COFF/types-empty-member-fn.ll b/test/DebugInfo/COFF/types-empty-member-fn.ll
new file mode 100644
index 000000000000..87cba9d62099
--- /dev/null
+++ b/test/DebugInfo/COFF/types-empty-member-fn.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -filetype=obj | llvm-readobj - -codeview | FileCheck %s
+
+; ModuleID = 'foo.3a1fbbbh-cgu.0'
+source_filename = "foo.3a1fbbbh-cgu.0"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+; Rust source to regenerate:
+; $ cat foo.rs
+; pub struct Foo;
+; impl Foo {
+;     pub fn foo() {}
+; }
+; $ rustc foo.rs --crate-type lib -Cdebuginfo=1 --emit=llvm-ir
+
+; CHECK:      CodeViewTypes [
+; CHECK:        MemberFunction (0x1006) {
+; CHECK-NEXT:     TypeLeafKind: LF_MFUNCTION (0x1009)
+; CHECK-NEXT:     ReturnType: void (0x3)
+; CHECK-NEXT:     ClassType: foo::Foo (0x1000)
+; CHECK-NEXT:     ThisType: 0x0
+; CHECK-NEXT:     CallingConvention: NearC (0x0)
+; CHECK-NEXT:     FunctionOptions [ (0x0)
+; CHECK-NEXT:     ]
+; CHECK-NEXT:     NumParameters: 0
+; CHECK-NEXT:     ArgListType: () (0x1005)
+; CHECK-NEXT:     ThisAdjustment: 0
+; CHECK-NEXT:   }
+; CHECK-NEXT:   MemberFuncId (0x1007) {
+; CHECK-NEXT:     TypeLeafKind: LF_MFUNC_ID (0x1602)
+; CHECK-NEXT:     ClassType: foo::Foo (0x1000)
+; CHECK-NEXT:     FunctionType: void foo::Foo::() (0x1006)
+; CHECK-NEXT:     Name: foo
+; CHECK-NEXT:   }
+; CHECK:      CodeViewDebugInfo [
+; CHECK:        FunctionLineTable [
+; CHECK-NEXT:     LinkageName: _ZN3foo3Foo3foo17hc557c2121772885bE
+; CHECK-NEXT:     Flags: 0x0
+; CHECK-NEXT:     CodeSize: 0x1
+; CHECK-NEXT:     FilenameSegment [
+; CHECK-NEXT:       Filename: D:\rust\foo.rs (0x0)
+; CHECK-NEXT:       +0x0 [
+; CHECK-NEXT:         LineNumberStart: 3
+; CHECK-NEXT:         LineNumberEndDelta: 0
+; CHECK-NEXT:         IsStatement: No
+; CHECK-NEXT:       ]
+; CHECK-NEXT:     ]
+; CHECK-NEXT:   ]
+
+; foo::Foo::foo
+; Function Attrs: uwtable
+define void @_ZN3foo3Foo3foo17hc557c2121772885bE() unnamed_addr #0 !dbg !5 {
+start:
+  ret void, !dbg !10
+}
+
+attributes #0 = { uwtable "target-cpu"="x86-64" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !1, producer: "clang LLVM (rustc version 1.33.0-nightly (8b0f0156e 2019-01-22))", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
+!1 = !DIFile(filename: "foo.rs", directory: "D:\5Crust")
+!2 = !{}
+!3 = !{i32 2, !"CodeView", i32 1}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_ZN3foo3Foo3foo17hc557c2121772885bE", scope: !6, file: !1, line: 3, type: !9, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, templateParams: !2, retainedNodes: !2)
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", scope: !8, file: !7, align: 8, elements: !2, templateParams: !2, identifier: "5105d9fe1a2a3c68518268151b672274")
+!7 = !DIFile(filename: "<unknown>", directory: "")
+!8 = !DINamespace(name: "foo", scope: null)
+!9 = !DISubroutineType(types: !2)
+!10 = !DILocation(line: 3, scope: !5)
diff --git a/test/DebugInfo/Mips/dwarfdump-tls.ll b/test/DebugInfo/Mips/dwarfdump-tls.ll
index 6aa429adb417..8d8af8c5124a 100644
--- a/test/DebugInfo/Mips/dwarfdump-tls.ll
+++ b/test/DebugInfo/Mips/dwarfdump-tls.ll
@@ -1,12 +1,34 @@
-; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj -o=%t-32.o < %s
+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=obj \
+; RUN:     -split-dwarf-file=foo.dwo -o=%t-32.o < %s
 ; RUN: llvm-dwarfdump %t-32.o 2>&1 | FileCheck %s
-; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj -o=%t-64.o < %s
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=obj \
+; RUN:     -split-dwarf-file=foo.dwo -o=%t-64.o < %s
 ; RUN: llvm-dwarfdump %t-64.o 2>&1 | FileCheck %s
 
+; RUN: llc -O0 -march=mips -mcpu=mips32r2 -filetype=asm \
+; RUN:     -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM32 %s
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 -filetype=asm \
+; RUN:     -split-dwarf-file=foo.dwo < %s | FileCheck -check-prefix=ASM64 %s
+
 @x = thread_local global i32 5, align 4, !dbg !0
 
 ; CHECK-NOT: error: failed to compute relocation: R_MIPS_TLS_DTPREL
 
+; CHECK:      DW_AT_name      ("x")
+; CHECK-NEXT: DW_AT_type      (0x00000025 "int")
+; CHECK-NEXT: DW_AT_external  (true)
+; CHECK-NEXT: DW_AT_decl_file (0x01)
+; CHECK-NEXT: DW_AT_decl_line (1)
+; CHECK-NEXT: DW_AT_location  (DW_OP_GNU_const_index 0x0, {{DW_OP_GNU_push_tls_address|DW_OP_form_tls_address}})
+
+; ASM32:              .section        .debug_addr
+; ASM32-NEXT: $addr_table_base0:
+; ASM32-NEXT:         .4byte  x+32768
+
+; ASM64:              .section        .debug_addr
+; ASM64-NEXT: .Laddr_table_base0:
+; ASM64-NEXT:         .8byte  x+32768
+
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!7, !8}
 
diff --git a/test/DebugInfo/X86/dwarfdump-debug-loclists.test b/test/DebugInfo/X86/dwarfdump-debug-loclists.test
index 669607fe557a..32f2482b5117 100644
--- a/test/DebugInfo/X86/dwarfdump-debug-loclists.test
+++ b/test/DebugInfo/X86/dwarfdump-debug-loclists.test
@@ -9,7 +9,7 @@
 # CHECK-NEXT:    [0x0000000000000700, 0x0000000000000710): DW_OP_breg5 RDI+0
 
 # CHECK:      .debug_loclists contents:
-# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+# CHECK-NEXT: 0x00000000: locations list header: length = 0x0000002c, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 # CHECK-NEXT: 0x00000000:
 # CHECK-NEXT:   [0x0000000000000000, 0x0000000000000010): DW_OP_breg5 RDI+0
 # CHECK-NEXT:   [0x0000000000000530, 0x0000000000000540): DW_OP_breg6 RBP-8, DW_OP_deref
@@ -37,7 +37,7 @@
   .byte  4                       # DW_LLE_offset_pair
   .uleb128 0x0                   #   starting offset
   .uleb128 0x10                  #   ending offset
-  .short  2                      # Loc expr size
+  .byte  2                       # Loc expr size
   .byte  117                     # DW_OP_breg5
   .byte  0                       # 0
   
@@ -47,7 +47,7 @@
   .byte  4                       # DW_LLE_offset_pair
   .uleb128 0x30                  #   starting offset
   .uleb128 0x40                  #   ending offset
-  .short  3                      # Loc expr size
+  .byte  3                       # Loc expr size
   .byte  118                     # DW_OP_breg6
   .byte  120                     # -8
   .byte  6                       # DW_OP_deref
@@ -55,7 +55,7 @@
   .byte  8                       # DW_LLE_start_length
   .quad  0x700                   # Some address
   .uleb128 0x10                  #   length
-  .short  2                      # Loc expr size
+  .byte  2                       # Loc expr size
   .byte  117                     # DW_OP_breg5
   .byte  0                       # 0
   
diff --git a/test/Transforms/FunctionImport/Inputs/comdat.ll b/test/Transforms/FunctionImport/Inputs/comdat.ll
new file mode 100644
index 000000000000..1df6f25351e5
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/comdat.ll
@@ -0,0 +1,10 @@
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+define void @main() {
+entry:
+  call i8* @lwt_fun()
+  ret void
+}
+
+declare i8* @lwt_fun()
diff --git a/test/Transforms/FunctionImport/comdat.ll b/test/Transforms/FunctionImport/comdat.ll
new file mode 100644
index 000000000000..29e8cb538ab6
--- /dev/null
+++ b/test/Transforms/FunctionImport/comdat.ll
@@ -0,0 +1,32 @@
+; Test to ensure that comdat is renamed consistently when comdat leader is
+; promoted and renamed due to an import. Required by COFF.
+
+; REQUIRES: x86-registered-target
+
+; RUN: opt -thinlto-bc -o %t1.bc %s
+; RUN: opt -thinlto-bc -o %t2.bc %S/Inputs/comdat.ll
+; RUN: llvm-lto2 run -save-temps -o %t3 %t1.bc %t2.bc \
+; RUN:          -r %t1.bc,lwt_fun,plx \
+; RUN:          -r %t2.bc,main,plx \
+; RUN:          -r %t2.bc,lwt_fun,
+; RUN: llvm-dis -o - %t3.1.3.import.bc | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.0.24215"
+
+; CHECK: $lwt.llvm.[[HASH:[0-9]+]] = comdat any
+$lwt = comdat any
+
+; CHECK: @lwt_aliasee = private unnamed_addr global {{.*}}, comdat($lwt.llvm.[[HASH]])
+@lwt_aliasee = private unnamed_addr global [1 x i8*] [i8* null], comdat($lwt)
+
+; CHECK: @lwt.llvm.[[HASH]] = hidden unnamed_addr alias
+@lwt = internal unnamed_addr alias [1 x i8*], [1 x i8*]* @lwt_aliasee
+
+; Below function should get imported into other module, resulting in @lwt being
+; promoted and renamed.
+define i8* @lwt_fun() {
+  %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* @lwt, i32 0, i32 0
+  %2 = load i8*, i8** %1
+  ret i8* %2
+}
diff --git a/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll b/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll
new file mode 100644
index 000000000000..77d09ad53f78
--- /dev/null
+++ b/test/Transforms/LoopTransformWarning/enable_and_isvectorized.ll
@@ -0,0 +1,33 @@
+; RUN: opt -transform-warning -disable-output < %s 2>&1 | FileCheck -allow-empty %s
+;
+; llvm.org/PR40546
+; Do not warn about about leftover llvm.loop.vectorize.enable for already
+; vectorized loops.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @test(i32 %n) {
+entry:
+  %cmp = icmp eq i32 %n, 0
+  br i1 %cmp, label %simd.if.end, label %omp.inner.for.body.preheader
+
+omp.inner.for.body.preheader:
+  %wide.trip.count = zext i32 %n to i64
+  br label %omp.inner.for.body
+
+omp.inner.for.body:
+  %indvars.iv = phi i64 [ 0, %omp.inner.for.body.preheader ], [ %indvars.iv.next, %omp.inner.for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %simd.if.end, label %omp.inner.for.body, !llvm.loop !0
+
+simd.if.end:
+  ret void
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
+!2 = !{!"llvm.loop.isvectorized"}
+
+
+; CHECK-NOT: loop not vectorized
diff --git a/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll b/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
new file mode 100644
index 000000000000..424ef3846224
--- /dev/null
+++ b/test/Transforms/LoopVectorize/no_switch_disable_vectorization.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -transform-warning -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -transform-warning -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
+
+; This test is a copy of no_switch.ll, with the "llvm.loop.vectorize.enable" metadata set to false.
+; It tests that vectorization is explicitly disabled and no warnings are emitted.
+
+; CHECK-NOT: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; CHECK-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; NOANALYSIS-NOT: remark: {{.*}}
+; NOANALYSIS-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly disabled
+; MOREINFO-NOT: warning: source.cpp:4:5: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
+
+; CHECK: _Z11test_switchPii
+; CHECK-NOT: x i32>
+; CHECK: ret
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind optsize ssp uwtable
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
+entry:
+  %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
+  br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !14
+
+for.body:                                         ; preds = %for.body.preheader, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  switch i32 %0, label %for.inc [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb3
+  ], !dbg !14
+
+sw.bb:                                            ; preds = %for.body
+  %1 = trunc i64 %indvars.iv to i32, !dbg !20
+  %mul = shl nsw i32 %1, 1, !dbg !20
+  br label %for.inc, !dbg !22
+
+sw.bb3:                                           ; preds = %for.body
+  %2 = trunc i64 %indvars.iv to i32, !dbg !23
+  store i32 %2, i32* %arrayidx, align 4, !dbg !23, !tbaa !16
+  br label %for.inc, !dbg !23
+
+for.inc:                                          ; preds = %sw.bb3, %for.body, %sw.bb
+  %storemerge = phi i32 [ %mul, %sw.bb ], [ 0, %for.body ], [ 0, %sw.bb3 ]
+  store i32 %storemerge, i32* %arrayidx, align 4, !dbg !20, !tbaa !16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+  %exitcond = icmp eq i32 %lftr.wideiv, %Length, !dbg !10
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !10, !llvm.loop !12
+
+for.end.loopexit:                                 ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !24
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "source.cpp", directory: ".")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, retainedNodes: !2)
+!5 = !DIFile(filename: "source.cpp", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.5.0"}
+!10 = !DILocation(line: 3, column: 8, scope: !11)
+!11 = distinct !DILexicalBlock(line: 3, column: 3, file: !1, scope: !4)
+!12 = !{!12, !13, !13}
+!13 = !{!"llvm.loop.vectorize.enable", i1 false}
+!14 = !DILocation(line: 4, column: 5, scope: !15)
+!15 = distinct !DILexicalBlock(line: 3, column: 36, file: !1, scope: !11)
+!16 = !{!17, !17, i64 0}
+!17 = !{!"int", !18, i64 0}
+!18 = !{!"omnipotent char", !19, i64 0}
+!19 = !{!"Simple C/C++ TBAA"}
+!20 = !DILocation(line: 6, column: 7, scope: !21)
+!21 = distinct !DILexicalBlock(line: 4, column: 18, file: !1, scope: !15)
+!22 = !DILocation(line: 7, column: 5, scope: !21)
+!23 = !DILocation(line: 9, column: 7, scope: !21)
+!24 = !DILocation(line: 14, column: 1, scope: !4)
diff --git a/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s b/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
index 07c68ab2618f..0b2ae5f8e7a4 100644
--- a/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
+++ b/test/tools/llvm-dwarfdump/X86/debug_loclists_startx_length.s
@@ -6,7 +6,7 @@
 # the final version which uses ULEB128 and not the U32.
 
 # CHECK:         .debug_loclists contents:
-# CHECK-NEXT:    0x00000000: locations list header: length = 0x0000000f, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
+# CHECK-NEXT:    0x00000000: locations list header: length = 0x0000000e, version = 0x0005, addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000000
 # CHECK-NEXT:    0x00000000:
 # CHECK-NEXT:    Addr idx 1 (w/ length 16): DW_OP_reg5 RDI
 
@@ -21,7 +21,7 @@
  .byte 3          # DW_LLE_startx_length
  .byte 0x01       # Index
  .uleb128 0x10    # Length
- .short 1         # Loc expr size
+ .byte 1          # Loc expr size
  .byte 85         # DW_OP_reg5
  .byte 0          # DW_LLE_end_of_list
 .Ldebug_loclist_table_end0:
author	Dimitry Andric <dim@FreeBSD.org>	2019-02-05 18:38:58 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-02-05 18:38:58 +0000
commit	e79719ce607b6130e41e23dbdc90cc6b4e0401e6 (patch)
tree	47bb93fafd9582425ebb778ec77002c222f64ed4
parent	3edec5c15a78e4abba7eb9102fef3891c84ebdfb (diff)
download	src-test2-e79719ce607b6130e41e23dbdc90cc6b4e0401e6.tar.gz src-test2-e79719ce607b6130e41e23dbdc90cc6b4e0401e6.zip