vendor/llvm/llvm-trunk-r239412

author: Dimitry Andric <dim@FreeBSD.org> 2015-06-09 19:06:30 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2015-06-09 19:06:30 +0000
commit: 85d8b2bbe386bcfe669575d05b61482d7be07e5d (patch)
tree: 1dc5e75ab222a9ead44c699eceafab7a6ca7b310 /lib/Target/ARM
parent: 5a5ac124e1efaf208671f01c46edb15f29ed2a0b (diff)
30 files changed, 1235 insertions, 587 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index d3cc068993e0..9550a3a3cad1 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,11 +16,13 @@
 #define LLVM_LIB_TARGET_ARM_ARM_H
 
 #include "llvm/Support/CodeGen.h"
+#include <functional>
 
 namespace llvm {
 
 class ARMAsmPrinter;
 class ARMBaseTargetMachine;
+class Function;
 class FunctionPass;
 class ImmutablePass;
 class MachineInstr;
@@ -38,7 +40,8 @@ FunctionPass *createARMConstantIslandPass();
 FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createARMOptimizeBarriersPass();
-FunctionPass *createThumb2SizeReductionPass();
+FunctionPass *createThumb2SizeReductionPass(
+    std::function<bool(const Function &)> Ftor = nullptr);
 
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 04503b89de73..d84f2961d810 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -87,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
   const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
   assert(GV && "C++ constructor pointer was not a GlobalValue!");
 
-  const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV,
+  const MCExpr *E = MCSymbolRefExpr::create(GetARMGVSymbol(GV,
                                                            ARMII::MO_NO_FLAG),
                                             (Subtarget->isTargetELF()
                                              ? MCSymbolRefExpr::VK_ARM_TARGET1
@@ -173,7 +173,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     break;
   }
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
+    MO.getMBB()->getSymbol()->print(O, MAI);
     return;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
@@ -181,7 +181,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
       O << ":lower16:";
     else if (TF & ARMII::MO_HI16)
       O << ":upper16:";
-    O << *GetARMGVSymbol(GV, TF);
+    GetARMGVSymbol(GV, TF)->print(O, MAI);
 
     printOffset(MO.getOffset(), O);
     if (TF == ARMII::MO_PLT)
@@ -189,7 +189,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     break;
   }
   case MachineOperand::MO_ConstantPoolIndex:
-    O << *GetCPISymbol(MO.getIndex());
+    GetCPISymbol(MO.getIndex())->print(O, MAI);
     break;
   }
 }
@@ -467,7 +467,7 @@ emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
     // using NLPs; however, sometimes the types are local to the file.
     // We need to fill in the value for the NLP in those cases.
     OutStreamer.EmitValue(
-        MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()),
+        MCSymbolRefExpr::create(MCSym.getPointer(), OutStreamer.getContext()),
         4 /*size*/);
 }
 
@@ -640,9 +640,13 @@ void ARMAsmPrinter::emitAttributes() {
     if (STI.hasFPARMv8())
       // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
       // FPU, but there are two different names for it depending on the CPU.
-      ATS.emitFPU(STI.hasD16() ? ARM::FK_FPV5_D16 : ARM::FK_FP_ARMV8);
+      ATS.emitFPU(STI.hasD16()
+                  ? (STI.isFPOnlySP() ? ARM::FK_FPV5_SP_D16 : ARM::FK_FPV5_D16)
+                  : ARM::FK_FP_ARMV8);
     else if (STI.hasVFP4())
-      ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV4_D16 : ARM::FK_VFPV4);
+      ATS.emitFPU(STI.hasD16()
+                  ? (STI.isFPOnlySP() ? ARM::FK_FPV4_SP_D16 : ARM::FK_VFPV4_D16)
+                  : ARM::FK_VFPV4);
     else if (STI.hasVFP3())
       ATS.emitFPU(STI.hasD16() ? ARM::FK_VFPV3_D16 : ARM::FK_VFPV3);
     else if (STI.hasVFP2())
@@ -895,7 +899,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
 
   // Create an MCSymbol for the reference.
   const MCExpr *Expr =
-    MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+    MCSymbolRefExpr::create(MCSym, getModifierVariantKind(ACPV->getModifier()),
                             OutContext);
 
   if (ACPV->getPCAdjustment()) {
@@ -903,10 +907,10 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
                                     getFunctionNumber(),
                                     ACPV->getLabelId(),
                                     OutContext);
-    const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+    const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
     PCRelExpr =
-      MCBinaryExpr::CreateAdd(PCRelExpr,
-                              MCConstantExpr::Create(ACPV->getPCAdjustment(),
+      MCBinaryExpr::createAdd(PCRelExpr,
+                              MCConstantExpr::create(ACPV->getPCAdjustment(),
                                                      OutContext),
                               OutContext);
     if (ACPV->mustAddCurrentAddress()) {
@@ -914,25 +918,22 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
       // label, so just emit a local label end reference that instead.
       MCSymbol *DotSym = OutContext.createTempSymbol();
       OutStreamer->EmitLabel(DotSym);
-      const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
-      PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+      const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
+      PCRelExpr = MCBinaryExpr::createSub(PCRelExpr, DotExpr, OutContext);
     }
-    Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+    Expr = MCBinaryExpr::createSub(Expr, PCRelExpr, OutContext);
   }
   OutStreamer->EmitValue(Expr, Size);
 }
 
-void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
-  unsigned Opcode = MI->getOpcode();
-  int OpNum = 1;
-  if (Opcode == ARM::BR_JTadd)
-    OpNum = 2;
-  else if (Opcode == ARM::BR_JTm)
-    OpNum = 3;
-
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
+void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
+  const MachineOperand &MO1 = MI->getOperand(1);
   unsigned JTI = MO1.getIndex();
 
+  // Make sure the Thumb jump table is 4-byte aligned. This will be a nop for
+  // ARM mode tables.
+  EmitAlignment(2);
+
   // Emit a label for the jump table.
   MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
   OutStreamer->EmitLabel(JTISymbol);
@@ -955,16 +956,16 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
     // LJTI_0_0:
     //    .word (LBB0 - LJTI_0_0)
     //    .word (LBB1 - LJTI_0_0)
-    const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+    const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
 
     if (TM.getRelocationModel() == Reloc::PIC_)
-      Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+      Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol,
                                                                    OutContext),
                                      OutContext);
     // If we're generating a table of Thumb addresses in static relocation
     // model, we need to add one to keep interworking correctly.
     else if (AFI->isThumbFunction())
-      Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(1,OutContext),
+      Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(1,OutContext),
                                      OutContext);
     OutStreamer->EmitValue(Expr, 4);
   }
@@ -972,10 +973,8 @@ void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
   OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
 }
 
-void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
-  unsigned Opcode = MI->getOpcode();
-  int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
+void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) {
+  const MachineOperand &MO1 = MI->getOperand(1);
   unsigned JTI = MO1.getIndex();
 
   MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
@@ -985,51 +984,67 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
-  unsigned OffsetWidth = 4;
-  if (MI->getOpcode() == ARM::t2TBB_JT) {
-    OffsetWidth = 1;
-    // Mark the jump table as data-in-code.
-    OutStreamer->EmitDataRegion(MCDR_DataRegionJT8);
-  } else if (MI->getOpcode() == ARM::t2TBH_JT) {
-    OffsetWidth = 2;
-    // Mark the jump table as data-in-code.
-    OutStreamer->EmitDataRegion(MCDR_DataRegionJT16);
-  }
 
   for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
     MachineBasicBlock *MBB = JTBBs[i];
-    const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+    const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
                                                           OutContext);
     // If this isn't a TBB or TBH, the entries are direct branch instructions.
-    if (OffsetWidth == 4) {
-      EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2B)
         .addExpr(MBBSymbolExpr)
         .addImm(ARMCC::AL)
         .addReg(0));
-      continue;
-    }
+  }
+}
+
+void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
+                                        unsigned OffsetWidth) {
+  assert((OffsetWidth == 1 || OffsetWidth == 2) && "invalid tbb/tbh width");
+  const MachineOperand &MO1 = MI->getOperand(1);
+  unsigned JTI = MO1.getIndex();
+
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
+  OutStreamer->EmitLabel(JTISymbol);
+
+  // Emit each entry of the table.
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+  // Mark the jump table as data-in-code.
+  OutStreamer->EmitDataRegion(OffsetWidth == 1 ? MCDR_DataRegionJT8
+                                               : MCDR_DataRegionJT16);
+
+  for (auto MBB : JTBBs) {
+    const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(),
+                                                          OutContext);
     // Otherwise it's an offset from the dispatch instruction. Construct an
     // MCExpr for the entry. We want a value of the form:
-    // (BasicBlockAddr - TableBeginAddr) / 2
+    // (BasicBlockAddr - TBBInstAddr + 4) / 2
     //
     // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
     // would look like:
     // LJTI_0_0:
-    //    .byte (LBB0 - LJTI_0_0) / 2
-    //    .byte (LBB1 - LJTI_0_0) / 2
-    const MCExpr *Expr =
-      MCBinaryExpr::CreateSub(MBBSymbolExpr,
-                              MCSymbolRefExpr::Create(JTISymbol, OutContext),
-                              OutContext);
-    Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+    //    .byte (LBB0 - (LCPI0_0 + 4)) / 2
+    //    .byte (LBB1 - (LCPI0_0 + 4)) / 2
+    // where LCPI0_0 is a label defined just before the TBB instruction using
+    // this table.
+    MCSymbol *TBInstPC = GetCPISymbol(MI->getOperand(0).getImm());
+    const MCExpr *Expr = MCBinaryExpr::createAdd(
+        MCSymbolRefExpr::create(TBInstPC, OutContext),
+        MCConstantExpr::create(4, OutContext), OutContext);
+    Expr = MCBinaryExpr::createSub(MBBSymbolExpr, Expr, OutContext);
+    Expr = MCBinaryExpr::createDiv(Expr, MCConstantExpr::create(2, OutContext),
                                    OutContext);
     OutStreamer->EmitValue(Expr, OffsetWidth);
   }
   // Mark the end of jump table data-in-code region. 32-bit offsets use
   // actual branch instructions here, so we don't mark those as a data-region
   // at all.
-  if (OffsetWidth != 4)
-    OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+  OutStreamer->EmitDataRegion(MCDR_DataRegionEnd);
+
+  // Make sure the next instruction is 2-byte aligned.
+  EmitAlignment(1);
 }
 
 void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
@@ -1212,7 +1227,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                   : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
                      : ARM::ADR))
       .addReg(MI->getOperand(0).getReg())
-      .addExpr(MCSymbolRefExpr::Create(CPISymbol, OutContext))
+      .addExpr(MCSymbolRefExpr::create(CPISymbol, OutContext))
       // Add predicate operands.
       .addImm(MI->getOperand(2).getImm())
       .addReg(MI->getOperand(3).getReg()));
@@ -1228,7 +1243,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                   : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
                      : ARM::ADR))
       .addReg(MI->getOperand(0).getReg())
-      .addExpr(MCSymbolRefExpr::Create(JTIPICSymbol, OutContext))
+      .addExpr(MCSymbolRefExpr::create(JTIPICSymbol, OutContext))
       // Add predicate operands.
       .addImm(MI->getOperand(2).getImm())
       .addReg(MI->getOperand(3).getReg()));
@@ -1278,7 +1293,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBL)
         // Predicate comes first here.
         .addImm(ARMCC::AL).addReg(0)
-        .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext)));
+        .addExpr(MCSymbolRefExpr::create(TRegSym, OutContext)));
     return;
   }
   case ARM::BMOVPCRX_CALL: {
@@ -1315,7 +1330,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const GlobalValue *GV = Op.getGlobal();
     const unsigned TF = Op.getTargetFlags();
     MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
-    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc)
       .addExpr(GVSymExpr)
       // Add predicate operands.
@@ -1332,17 +1347,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned TF = MI->getOperand(1).getTargetFlags();
     const GlobalValue *GV = MI->getOperand(1).getGlobal();
     MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
-    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
 
     MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
                                      getFunctionNumber(),
                                      MI->getOperand(2).getImm(), OutContext);
-    const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+    const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
     unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
     const MCExpr *PCRelExpr =
-      ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
-                                      MCBinaryExpr::CreateAdd(LabelSymExpr,
-                                      MCConstantExpr::Create(PCAdj, OutContext),
+      ARMMCExpr::createLower16(MCBinaryExpr::createSub(GVSymExpr,
+                                      MCBinaryExpr::createAdd(LabelSymExpr,
+                                      MCConstantExpr::create(PCAdj, OutContext),
                                       OutContext), OutContext), OutContext);
       TmpInst.addOperand(MCOperand::createExpr(PCRelExpr));
 
@@ -1365,17 +1380,17 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     unsigned TF = MI->getOperand(2).getTargetFlags();
     const GlobalValue *GV = MI->getOperand(2).getGlobal();
     MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
-    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
 
     MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
                                      getFunctionNumber(),
                                      MI->getOperand(3).getImm(), OutContext);
-    const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+    const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
     unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
     const MCExpr *PCRelExpr =
-        ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
-                                   MCBinaryExpr::CreateAdd(LabelSymExpr,
-                                      MCConstantExpr::Create(PCAdj, OutContext),
+        ARMMCExpr::createUpper16(MCBinaryExpr::createSub(GVSymExpr,
+                                   MCBinaryExpr::createAdd(LabelSymExpr,
+                                      MCConstantExpr::create(PCAdj, OutContext),
                                           OutContext), OutContext), OutContext);
       TmpInst.addOperand(MCOperand::createExpr(PCRelExpr));
     // Add predicate operands.
@@ -1501,6 +1516,16 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       EmitGlobalConstant(MCPE.Val.ConstVal);
     return;
   }
+  case ARM::JUMPTABLE_ADDRS:
+    EmitJumpTableAddrs(MI);
+    return;
+  case ARM::JUMPTABLE_INSTS:
+    EmitJumpTableInsts(MI);
+    return;
+  case ARM::JUMPTABLE_TBB:
+  case ARM::JUMPTABLE_TBH:
+    EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2);
+    return;
   case ARM::t2BR_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
@@ -1509,37 +1534,19 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       // Add predicate operands.
       .addImm(ARMCC::AL)
       .addReg(0));
-
-    // Output the data for the jump table itself
-    EmitJump2Table(MI);
-    return;
-  }
-  case ARM::t2TBB_JT: {
-    // Lower and emit the instruction itself, then the jump table following it.
-    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBB)
-      .addReg(ARM::PC)
-      .addReg(MI->getOperand(0).getReg())
-      // Add predicate operands.
-      .addImm(ARMCC::AL)
-      .addReg(0));
-
-    // Output the data for the jump table itself
-    EmitJump2Table(MI);
-    // Make sure the next instruction is 2-byte aligned.
-    EmitAlignment(1);
     return;
   }
+  case ARM::t2TBB_JT:
   case ARM::t2TBH_JT: {
-    // Lower and emit the instruction itself, then the jump table following it.
-    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::t2TBH)
-      .addReg(ARM::PC)
-      .addReg(MI->getOperand(0).getReg())
-      // Add predicate operands.
-      .addImm(ARMCC::AL)
-      .addReg(0));
-
-    // Output the data for the jump table itself
-    EmitJump2Table(MI);
+    unsigned Opc = MI->getOpcode() == ARM::t2TBB_JT ? ARM::t2TBB : ARM::t2TBH;
+    // Lower and emit the PC label, then the instruction itself.
+    OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+                                     .addReg(MI->getOperand(0).getReg())
+                                     .addReg(MI->getOperand(1).getReg())
+                                     // Add predicate operands.
+                                     .addImm(ARMCC::AL)
+                                     .addReg(0));
     return;
   }
   case ARM::tBR_JTr:
@@ -1559,13 +1566,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (Opc == ARM::MOVr)
       TmpInst.addOperand(MCOperand::createReg(0));
     EmitToStreamer(*OutStreamer, TmpInst);
-
-    // Make sure the Thumb jump table is 4-byte aligned.
-    if (Opc == ARM::tMOVr)
-      EmitAlignment(2);
-
-    // Output the data for the jump table itself
-    EmitJumpTable(MI);
     return;
   }
   case ARM::BR_JTm: {
@@ -1589,9 +1589,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     TmpInst.addOperand(MCOperand::createImm(ARMCC::AL));
     TmpInst.addOperand(MCOperand::createReg(0));
     EmitToStreamer(*OutStreamer, TmpInst);
-
-    // Output the data for the jump table itself
-    EmitJumpTable(MI);
     return;
   }
   case ARM::BR_JTadd: {
@@ -1606,9 +1603,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addReg(0)
       // Add 's' bit operand (always reg0 for this)
       .addReg(0));
-
-    // Output the data for the jump table itself
-    EmitJumpTable(MI);
     return;
   }
   case ARM::SPACE:
@@ -1695,7 +1689,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addImm(ARMCC::AL)
       .addReg(0));
 
-    const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+    const MCExpr *SymbolExpr = MCSymbolRefExpr::create(Label, OutContext);
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tB)
       .addExpr(SymbolExpr)
       .addImm(ARMCC::AL)
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7bfb9447818e..a6bc3683c8b9 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -71,8 +71,9 @@ public:
   void emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
                         const MCSubtargetInfo *EndInfo) const override;
 
-  void EmitJumpTable(const MachineInstr *MI);
-  void EmitJump2Table(const MachineInstr *MI);
+  void EmitJumpTableAddrs(const MachineInstr *MI);
+  void EmitJumpTableInsts(const MachineInstr *MI);
+  void EmitJumpTableTBInst(const MachineInstr *MI, unsigned OffsetWidth);
   void EmitInstruction(const MachineInstr *MI) override;
   bool runOnMachineFunction(MachineFunction &F) override;
 
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c5d6b258240a..9c4b4961fe8c 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -627,6 +627,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case ARM::t2MOVi32imm:
     return 8;
   case ARM::CONSTPOOL_ENTRY:
+  case ARM::JUMPTABLE_INSTS:
+  case ARM::JUMPTABLE_ADDRS:
+  case ARM::JUMPTABLE_TBB:
+  case ARM::JUMPTABLE_TBH:
     // If this machine instr is a constant pool entry, its size is recorded as
     // operand #2.
     return MI->getOperand(2).getImm();
@@ -641,42 +645,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case ARM::t2Int_eh_sjlj_setjmp:
   case ARM::t2Int_eh_sjlj_setjmp_nofp:
     return 12;
-  case ARM::BR_JTr:
-  case ARM::BR_JTm:
-  case ARM::BR_JTadd:
-  case ARM::tBR_JTr:
-  case ARM::t2BR_JT:
-  case ARM::t2TBB_JT:
-  case ARM::t2TBH_JT: {
-    // These are jumptable branches, i.e. a branch followed by an inlined
-    // jumptable. The size is 4 + 4 * number of entries. For TBB, each
-    // entry is one byte; TBH two byte each.
-    unsigned EntrySize = (Opc == ARM::t2TBB_JT)
-      ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
-    unsigned NumOps = MCID.getNumOperands();
-    MachineOperand JTOP =
-      MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
-    unsigned JTI = JTOP.getIndex();
-    const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-    assert(MJTI != nullptr);
-    const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-    assert(JTI < JT.size());
-    // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
-    // 4 aligned. The assembler / linker may add 2 byte padding just before
-    // the JT entries.  The size does not include this padding; the
-    // constant islands pass does separate bookkeeping for it.
-    // FIXME: If we know the size of the function is less than (1 << 16) *2
-    // bytes, we can use 16-bit entries instead. Then there won't be an
-    // alignment issue.
-    unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
-    unsigned NumEntries = JT[JTI].MBBs.size();
-    if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
-      // Make sure the instruction that follows TBB is 2-byte aligned.
-      // FIXME: Constant island pass should insert an "ALIGN" instruction
-      // instead.
-      ++NumEntries;
-    return NumEntries * EntrySize + InstSize;
-  }
   case ARM::SPACE:
     return MI->getOperand(1).getImm();
   }
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 6fa5ad7d0522..f4ec8c67c977 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -180,9 +180,7 @@ namespace {
       MachineInstr *MI;
       MachineInstr *CPEMI;
       MachineBasicBlock *HighWaterMark;
-    private:
       unsigned MaxDisp;
-    public:
       bool NegOk;
       bool IsSoImm;
       bool KnownAlignment;
@@ -216,12 +214,24 @@ namespace {
     };
 
     /// CPEntries - Keep track of all of the constant pool entry machine
-    /// instructions. For each original constpool index (i.e. those that
-    /// existed upon entry to this pass), it keeps a vector of entries.
-    /// Original elements are cloned as we go along; the clones are
-    /// put in the vector of the original element, but have distinct CPIs.
+    /// instructions. For each original constpool index (i.e. those that existed
+    /// upon entry to this pass), it keeps a vector of entries.  Original
+    /// elements are cloned as we go along; the clones are put in the vector of
+    /// the original element, but have distinct CPIs.
+    ///
+    /// The first half of CPEntries contains generic constants, the second half
+    /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
+    /// which vector it will be in here.
     std::vector<std::vector<CPEntry> > CPEntries;
 
+    /// Maps a JT index to the offset in CPEntries containing copies of that
+    /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
+    DenseMap<int, int> JumpTableEntryIndices;
+
+    /// Maps a JT index to the LEA that actually uses the index to calculate its
+    /// base address.
+    DenseMap<int, int> JumpTableUserIndices;
+
     /// ImmBranch - One per immediate branch, keeping the machine instruction
     /// pointer, conditional or unconditional, the max displacement,
     /// and (if isCond is true) the corresponding unconditional branch
@@ -269,7 +279,8 @@ namespace {
     }
 
   private:
-    void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
+    void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
+    void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
     bool BBHasFallthrough(MachineBasicBlock *MBB);
     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
     unsigned getCPELogAlign(const MachineInstr *CPEMI);
@@ -279,6 +290,7 @@ namespace {
     void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
     void adjustBBOffsetsAfter(MachineBasicBlock *BB);
     bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+    unsigned getCombinedIndex(const MachineInstr *CPEMI);
     int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
     bool findAvailableWater(CPUser&U, unsigned UserOffset,
                             water_iterator &WaterIter);
@@ -301,8 +313,9 @@ namespace {
     bool optimizeThumb2Instructions();
     bool optimizeThumb2Branches();
     bool reorderThumb2JumpTables();
-    unsigned removeDeadDefinitions(MachineInstr *MI, unsigned BaseReg,
-                                   unsigned IdxReg);
+    bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
+                              unsigned &DeadSize, bool &CanDeleteLEA,
+                              bool &BaseRegKill);
     bool optimizeThumb2JumpTables();
     MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
                                                   MachineBasicBlock *JTBB);
@@ -413,7 +426,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   // we put them all at the end of the function.
   std::vector<MachineInstr*> CPEMIs;
   if (!MCP->isEmpty())
-    doInitialPlacement(CPEMIs);
+    doInitialConstPlacement(CPEMIs);
+
+  if (MF->getJumpTableInfo())
+    doInitialJumpTablePlacement(CPEMIs);
 
   /// The next UID to take is the first unused one.
   AFI->initPICLabelUId(CPEMIs.size());
@@ -478,7 +494,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
     for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
       const CPEntry & CPE = CPEntries[i][j];
-      AFI->recordCPEClone(i, CPE.CPI);
+      if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
+        AFI->recordCPEClone(i, CPE.CPI);
     }
   }
 
@@ -488,6 +505,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   WaterList.clear();
   CPUsers.clear();
   CPEntries.clear();
+  JumpTableEntryIndices.clear();
+  JumpTableUserIndices.clear();
   ImmBranches.clear();
   PushPopMIs.clear();
   T2JumpTables.clear();
@@ -495,10 +514,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   return MadeChange;
 }
 
-/// doInitialPlacement - Perform the initial placement of the constant pool
-/// entries.  To start with, we put them all at the end of the function.
+/// \brief Perform the initial placement of the regular constant pool entries.
+/// To start with, we put them all at the end of the function.
 void
-ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
+ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
   // Create the basic block to hold the CPE's.
   MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
   MF->push_back(BB);
@@ -556,6 +575,66 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   DEBUG(BB->dump());
 }
 
+/// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
+/// instructions can be made more efficient if the jump table immediately
+/// follows the instruction, it's best to place them immediately next to their
+/// jumps to begin with. In almost all cases they'll never be moved from that
+/// position.
+void ARMConstantIslands::doInitialJumpTablePlacement(
+    std::vector<MachineInstr *> &CPEMIs) {
+  unsigned i = CPEntries.size();
+  auto MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+
+  MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
+  for (MachineBasicBlock &MBB : *MF) {
+    auto MI = MBB.getLastNonDebugInstr();
+
+    unsigned JTOpcode;
+    switch (MI->getOpcode()) {
+    default:
+      continue;
+    case ARM::BR_JTadd:
+    case ARM::BR_JTr:
+    case ARM::tBR_JTr:
+    case ARM::BR_JTm:
+      JTOpcode = ARM::JUMPTABLE_ADDRS;
+      break;
+    case ARM::t2BR_JT:
+      JTOpcode = ARM::JUMPTABLE_INSTS;
+      break;
+    case ARM::t2TBB_JT:
+      JTOpcode = ARM::JUMPTABLE_TBB;
+      break;
+    case ARM::t2TBH_JT:
+      JTOpcode = ARM::JUMPTABLE_TBH;
+      break;
+    }
+
+    unsigned NumOps = MI->getDesc().getNumOperands();
+    MachineOperand JTOp =
+      MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
+    unsigned JTI = JTOp.getIndex();
+    unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
+    MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
+    MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
+    MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
+                                  DebugLoc(), TII->get(JTOpcode))
+                              .addImm(i++)
+                              .addJumpTableIndex(JTI)
+                              .addImm(Size);
+    CPEMIs.push_back(CPEMI);
+    CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
+    JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
+    if (!LastCorrectlyNumberedBB)
+      LastCorrectlyNumberedBB = &MBB;
+  }
+
+  // If we did anything then we need to renumber the subsequent blocks.
+  if (LastCorrectlyNumberedBB)
+    MF->RenumberBlocks(LastCorrectlyNumberedBB);
+}
+
 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
 /// into the block immediately after it.
 bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
@@ -595,9 +674,21 @@ ARMConstantIslands::CPEntry
 /// getCPELogAlign - Returns the required alignment of the constant pool entry
 /// represented by CPEMI.  Alignment is measured in log2(bytes) units.
 unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
-  assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+  switch (CPEMI->getOpcode()) {
+  case ARM::CONSTPOOL_ENTRY:
+    break;
+  case ARM::JUMPTABLE_TBB:
+    return 0;
+  case ARM::JUMPTABLE_TBH:
+  case ARM::JUMPTABLE_INSTS:
+    return 1;
+  case ARM::JUMPTABLE_ADDRS:
+    return 2;
+  default:
+    llvm_unreachable("unknown constpool entry kind");
+  }
 
-  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned CPI = getCombinedIndex(CPEMI);
   assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
   unsigned Align = MCP->getConstants()[CPI].getAlignment();
   assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
@@ -706,12 +797,14 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
       if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
         PushPopMIs.push_back(I);
 
-      if (Opc == ARM::CONSTPOOL_ENTRY)
+      if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
+          Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
+          Opc == ARM::JUMPTABLE_TBH)
         continue;
 
       // Scan the instructions for constant pool operands.
       for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
-        if (I->getOperand(op).isCPI()) {
+        if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
           // We found one.  The addressing mode tells us the max displacement
           // from the PC that this instruction permits.
 
@@ -727,6 +820,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
 
           // Taking the address of a CP entry.
           case ARM::LEApcrel:
+          case ARM::LEApcrelJT:
             // This takes a SoImm, which is 8 bit immediate rotated. We'll
             // pretend the maximum offset is 255 * 4. Since each instruction
             // 4 byte wide, this is always correct. We'll check for other
@@ -737,10 +831,12 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
             IsSoImm = true;
             break;
           case ARM::t2LEApcrel:
+          case ARM::t2LEApcrelJT:
             Bits = 12;
             NegOk = true;
             break;
           case ARM::tLEApcrel:
+          case ARM::tLEApcrelJT:
             Bits = 8;
             Scale = 4;
             break;
@@ -768,6 +864,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
 
           // Remember that this is a user of a CP entry.
           unsigned CPI = I->getOperand(op).getIndex();
+          if (I->getOperand(op).isJTI()) {
+            JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
+            CPI = JumpTableEntryIndices[CPI];
+          }
+
           MachineInstr *CPEMI = CPEMIs[CPI];
           unsigned MaxOffs = ((1 << Bits)-1) * Scale;
           CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
@@ -1101,6 +1202,13 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
   return false;
 }
 
+unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
+  if (CPEMI->getOperand(1).isCPI())
+    return CPEMI->getOperand(1).getIndex();
+
+  return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
+}
+
 /// LookForCPEntryInRange - see if the currently referenced CPE is in range;
 /// if not, see if an in-range clone of the CPE is in range, and if so,
 /// change the data structures so the user references the clone.  Returns:
@@ -1120,7 +1228,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
   }
 
   // No.  Look for previously created clones of the CPE that are in range.
-  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned CPI = getCombinedIndex(CPEMI);
   std::vector<CPEntry> &CPEs = CPEntries[CPI];
   for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
     // We already tried this one
@@ -1365,7 +1473,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
   CPUser &U = CPUsers[CPUserIndex];
   MachineInstr *UserMI = U.MI;
   MachineInstr *CPEMI  = U.CPEMI;
-  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned CPI = getCombinedIndex(CPEMI);
   unsigned Size = CPEMI->getOperand(2).getImm();
   // Compute this only once, it's expensive.
   unsigned UserOffset = getUserOffset(U);
@@ -1429,17 +1537,17 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
   // Update internal data structures to account for the newly inserted MBB.
   updateForInsertedWaterBlock(NewIsland);
 
-  // Decrement the old entry, and remove it if refcount becomes 0.
-  decrementCPEReferenceCount(CPI, CPEMI);
-
   // Now that we have an island to add the CPE to, clone the original CPE and
   // add it to the island.
   U.HighWaterMark = NewIsland;
-  U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
-                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+  U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
+                .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
   CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
   ++NumCPEs;
 
+  // Decrement the old entry, and remove it if refcount becomes 0.
+  decrementCPEReferenceCount(CPI, CPEMI);
+
   // Mark the basic block as aligned as required by the const-pool entry.
   NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
 
@@ -1844,77 +1952,120 @@ bool ARMConstantIslands::optimizeThumb2Branches() {
   return MadeChange;
 }
 
-/// If we've formed a TBB or TBH instruction, the base register is now
-/// redundant. In most cases, the instructions defining it will now be dead and
-/// can be tidied up. This function removes them if so, and returns the number
-/// of bytes saved.
-unsigned ARMConstantIslands::removeDeadDefinitions(MachineInstr *MI,
-                                                   unsigned BaseReg,
-                                                   unsigned IdxReg) {
-  unsigned BytesRemoved = 0;
-  MachineBasicBlock *MBB = MI->getParent();
+static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
+                              unsigned BaseReg) {
+  if (I.getOpcode() != ARM::t2ADDrs)
+    return false;
 
-  // Scan backwards to find the instruction that defines the base
-  // register. Due to post-RA scheduling, we can't count on it
-  // immediately preceding the branch instruction.
-  MachineBasicBlock::iterator PrevI = MI;
-  MachineBasicBlock::iterator B = MBB->begin();
-  while (PrevI != B && !PrevI->definesRegister(BaseReg))
-    --PrevI;
+  if (I.getOperand(0).getReg() != EntryReg)
+    return false;
 
-  // If for some reason we didn't find it, we can't do anything, so
-  // just skip this one.
-  if (!PrevI->definesRegister(BaseReg) || PrevI->hasUnmodeledSideEffects() ||
-      PrevI->mayStore())
-    return BytesRemoved;
+  if (I.getOperand(1).getReg() != BaseReg)
+    return false;
 
-  MachineInstr *AddrMI = PrevI;
-  unsigned NewBaseReg = BytesRemoved;
+  // FIXME: what about CC and IdxReg?
+  return true;
+}
 
-  // Examine the instruction that calculates the jumptable entry address.  Make
-  // sure it only defines the base register and kills any uses other than the
-  // index register. We also need precisely one use to trace backwards to
-  // (hopefully) the LEA.
-  for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
-    const MachineOperand &MO = AddrMI->getOperand(k);
-    if (!MO.isReg() || !MO.getReg())
-      continue;
-    if (MO.isDef() && MO.getReg() != BaseReg)
-      return BytesRemoved;
+/// \brief While trying to form a TBB/TBH instruction, we may (if the table
+/// doesn't immediately follow the BR_JT) need access to the start of the
+/// jump-table. We know one instruction that produces such a register; this
+/// function works out whether that definition can be preserved to the BR_JT,
+/// possibly by removing an intervening addition (which is usually needed to
+/// calculate the actual entry to jump to).
+bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
+                                              MachineInstr *LEAMI,
+                                              unsigned &DeadSize,
+                                              bool &CanDeleteLEA,
+                                              bool &BaseRegKill) {
+  if (JumpMI->getParent() != LEAMI->getParent())
+    return false;
 
-    if (MO.isUse() && MO.getReg() != IdxReg) {
-      if (!MO.isKill() || (NewBaseReg != 0 && NewBaseReg != MO.getReg()))
-        return BytesRemoved;
-      NewBaseReg = MO.getReg();
+  // Now we hope that we have at least these instructions in the basic block:
+  //     BaseReg = t2LEA ...
+  //     [...]
+  //     EntryReg = t2ADDrs BaseReg, ...
+  //     [...]
+  //     t2BR_JT EntryReg
+  //
+  // We have to be very conservative about what we recognise here though. The
+  // main perturbing factors to watch out for are:
+  //    + Spills at any point in the chain: not direct problems but we would
+  //      expect a blocking Def of the spilled register so in practice what we
+  //      can do is limited.
+  //    + EntryReg == BaseReg: this is the one situation we should allow a Def
+  //      of BaseReg, but only if the t2ADDrs can be removed.
+  //    + Some instruction other than t2ADDrs computing the entry. Not seen in
+  //      the wild, but we should be careful.
+  unsigned EntryReg = JumpMI->getOperand(0).getReg();
+  unsigned BaseReg = LEAMI->getOperand(0).getReg();
+
+  CanDeleteLEA = true;
+  BaseRegKill = false;
+  MachineInstr *RemovableAdd = nullptr;
+  MachineBasicBlock::iterator I(LEAMI);
+  for (++I; &*I != JumpMI; ++I) {
+    if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
+      RemovableAdd = &*I;
+      break;
+    }
+
+    for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+      const MachineOperand &MO = I->getOperand(K);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (MO.isDef() && MO.getReg() == BaseReg)
+        return false;
+      if (MO.isUse() && MO.getReg() == BaseReg) {
+        BaseRegKill = BaseRegKill || MO.isKill();
+        CanDeleteLEA = false;
+      }
     }
   }
 
-  // Want to continue searching for AddrMI, but there are 2 problems: AddrMI is
-  // going away soon, and even decrementing once may be invalid.
-  if (PrevI != B)
-    PrevI = std::prev(PrevI);
+  if (!RemovableAdd)
+    return true;
 
-  DEBUG(dbgs() << "remove addr: " << *AddrMI);
-  BytesRemoved += TII->GetInstSizeInBytes(AddrMI);
-  AddrMI->eraseFromParent();
+  // Check the add really is removable, and that nothing else in the block
+  // clobbers BaseReg.
+  for (++I; &*I != JumpMI; ++I) {
+    for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
+      const MachineOperand &MO = I->getOperand(K);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (MO.isDef() && MO.getReg() == BaseReg)
+        return false;
+      if (MO.isUse() && MO.getReg() == EntryReg)
+        RemovableAdd = nullptr;
+    }
+  }
 
-  // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
-  // that gave us the initial base register definition.
-  for (; PrevI != B && !PrevI->definesRegister(NewBaseReg); --PrevI)
-    ;
+  if (RemovableAdd) {
+    RemovableAdd->eraseFromParent();
+    DeadSize += 4;
+  } else if (BaseReg == EntryReg) {
+    // The add wasn't removable, but clobbered the base for the TBB. So we can't
+    // preserve it.
+    return false;
+  }
 
-  // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
-  // to delete it as well.
-  MachineInstr *LeaMI = PrevI;
-  if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
-       LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
-      LeaMI->getOperand(0).getReg() != NewBaseReg)
-    return BytesRemoved;
+  // We reached the end of the block without seeing another definition of
+  // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
+  // used in the TBB/TBH if necessary.
+  return true;
+}
 
-  DEBUG(dbgs() << "remove lea: " << *LeaMI);
-  BytesRemoved += TII->GetInstSizeInBytes(LeaMI);
-  LeaMI->eraseFromParent();
-  return BytesRemoved;
+/// \brief Returns whether CPEMI is the first instruction in the block
+/// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
+/// we can switch the first register to PC and usually remove the address
+/// calculation that preceeded it.
+static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
+  MachineFunction::iterator MBB = JTMI->getParent();
+  MachineFunction *MF = MBB->getParent();
+  ++MBB;
+
+  return MBB != MF->end() && MBB->begin() != MBB->end() &&
+         &*MBB->begin() == CPEMI;
 }
 
 /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
@@ -1955,37 +2106,79 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
         break;
     }
 
-    if (ByteOk || HalfWordOk) {
-      MachineBasicBlock *MBB = MI->getParent();
-      unsigned BaseReg = MI->getOperand(0).getReg();
-      bool BaseRegKill = MI->getOperand(0).isKill();
-      if (!BaseRegKill)
-        continue;
-      unsigned IdxReg = MI->getOperand(1).getReg();
-      bool IdxRegKill = MI->getOperand(1).isKill();
+    if (!ByteOk && !HalfWordOk)
+      continue;
+
+    MachineBasicBlock *MBB = MI->getParent();
+    if (!MI->getOperand(0).isKill()) // FIXME: needed now?
+      continue;
+    unsigned IdxReg = MI->getOperand(1).getReg();
+    bool IdxRegKill = MI->getOperand(1).isKill();
 
-      DEBUG(dbgs() << "Shrink JT: " << *MI);
-      unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
-      MachineBasicBlock::iterator MI_JT = MI;
-      MachineInstr *NewJTMI =
+    CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
+    unsigned DeadSize = 0;
+    bool CanDeleteLEA = false;
+    bool BaseRegKill = false;
+    bool PreservedBaseReg =
+        preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
+
+    if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+      continue;
+
+    DEBUG(dbgs() << "Shrink JT: " << *MI);
+    MachineInstr *CPEMI = User.CPEMI;
+    unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+    MachineBasicBlock::iterator MI_JT = MI;
+    MachineInstr *NewJTMI =
         BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
-        .addReg(IdxReg, getKillRegState(IdxRegKill))
-        .addJumpTableIndex(JTI, JTOP.getTargetFlags());
-      DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
-      // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
-      // is 2-byte aligned. For now, asm printer will fix it up.
-      unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
-      unsigned OrigSize = TII->GetInstSizeInBytes(MI);
-      unsigned DeadSize = removeDeadDefinitions(MI, BaseReg, IdxReg);
-      MI->eraseFromParent();
+            .addReg(User.MI->getOperand(0).getReg(),
+                    getKillRegState(BaseRegKill))
+            .addReg(IdxReg, getKillRegState(IdxRegKill))
+            .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+            .addImm(CPEMI->getOperand(0).getImm());
+    DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
 
-      int delta = OrigSize - NewSize + DeadSize;
-      BBInfo[MBB->getNumber()].Size -= delta;
-      adjustBBOffsetsAfter(MBB);
+    unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
+    CPEMI->setDesc(TII->get(JTOpc));
 
-      ++NumTBs;
-      MadeChange = true;
+    if (jumpTableFollowsTB(MI, User.CPEMI)) {
+      NewJTMI->getOperand(0).setReg(ARM::PC);
+      NewJTMI->getOperand(0).setIsKill(false);
+
+      if (CanDeleteLEA)  {
+        User.MI->eraseFromParent();
+        DeadSize += 4;
+
+        // The LEA was eliminated, the TBB instruction becomes the only new user
+        // of the jump table.
+        User.MI = NewJTMI;
+        User.MaxDisp = 4;
+        User.NegOk = false;
+        User.IsSoImm = false;
+        User.KnownAlignment = false;
+      } else {
+        // The LEA couldn't be eliminated, so we must add another CPUser to
+        // record the TBB or TBH use.
+        int CPEntryIdx = JumpTableEntryIndices[JTI];
+        auto &CPEs = CPEntries[CPEntryIdx];
+        auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
+          return E.CPEMI == User.CPEMI;
+        });
+        ++Entry->RefCount;
+        CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
+      }
     }
+
+    unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+    unsigned OrigSize = TII->GetInstSizeInBytes(MI);
+    MI->eraseFromParent();
+
+    int Delta = OrigSize - NewSize + DeadSize;
+    BBInfo[MBB->getNumber()].Size -= Delta;
+    adjustBBOffsetsAfter(MBB);
+
+    ++NumTBs;
+    MadeChange = true;
   }
 
   return MadeChange;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 4405625e47cd..50afb192b331 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -251,6 +252,9 @@ private:
   // Select special operations if node forms integer ABS pattern
   SDNode *SelectABSOp(SDNode *N);
 
+  SDNode *SelectReadRegister(SDNode *N);
+  SDNode *SelectWriteRegister(SDNode *N);
+
   SDNode *SelectInlineAsm(SDNode *N);
 
   SDNode *SelectConcatVector(SDNode *N);
@@ -2457,6 +2461,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
   switch (N->getOpcode()) {
   default: break;
+  case ISD::WRITE_REGISTER: {
+    SDNode *ResNode = SelectWriteRegister(N);
+    if (ResNode)
+      return ResNode;
+    break;
+  }
+  case ISD::READ_REGISTER: {
+    SDNode *ResNode = SelectReadRegister(N);
+    if (ResNode)
+      return ResNode;
+    break;
+  }
   case ISD::INLINEASM: {
     SDNode *ResNode = SelectInlineAsm(N);
     if (ResNode)
@@ -3336,6 +3352,418 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   return SelectCode(N);
 }
 
+// Inspect a register string of the form
+// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
+// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
+// and obtain the integer operands from them, adding these operands to the
+// provided vector.
+static void getIntOperandsFromRegisterString(StringRef RegString,
+                                             SelectionDAG *CurDAG, SDLoc DL,
+                                             std::vector<SDValue>& Ops) {
+  SmallVector<StringRef, 5> Fields;
+  RegString.split(Fields, ":");
+
+  if (Fields.size() > 1) {
+    bool AllIntFields = true;
+
+    for (StringRef Field : Fields) {
+      // Need to trim out leading 'cp' characters and get the integer field.
+      unsigned IntField;
+      AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
+      Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
+    }
+
+    assert(AllIntFields &&
+            "Unexpected non-integer value in special register string.");
+  }
+}
+
+// Maps a Banked Register string to its mask value. The mask value returned is
+// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
+// mask operand, which expresses which register is to be used, e.g. r8, and in
+// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
+// was invalid.
+static inline int getBankedRegisterMask(StringRef RegString) {
+  return StringSwitch<int>(RegString.lower())
+          .Case("r8_usr", 0x00)
+          .Case("r9_usr", 0x01)
+          .Case("r10_usr", 0x02)
+          .Case("r11_usr", 0x03)
+          .Case("r12_usr", 0x04)
+          .Case("sp_usr", 0x05)
+          .Case("lr_usr", 0x06)
+          .Case("r8_fiq", 0x08)
+          .Case("r9_fiq", 0x09)
+          .Case("r10_fiq", 0x0a)
+          .Case("r11_fiq", 0x0b)
+          .Case("r12_fiq", 0x0c)
+          .Case("sp_fiq", 0x0d)
+          .Case("lr_fiq", 0x0e)
+          .Case("lr_irq", 0x10)
+          .Case("sp_irq", 0x11)
+          .Case("lr_svc", 0x12)
+          .Case("sp_svc", 0x13)
+          .Case("lr_abt", 0x14)
+          .Case("sp_abt", 0x15)
+          .Case("lr_und", 0x16)
+          .Case("sp_und", 0x17)
+          .Case("lr_mon", 0x1c)
+          .Case("sp_mon", 0x1d)
+          .Case("elr_hyp", 0x1e)
+          .Case("sp_hyp", 0x1f)
+          .Case("spsr_fiq", 0x2e)
+          .Case("spsr_irq", 0x30)
+          .Case("spsr_svc", 0x32)
+          .Case("spsr_abt", 0x34)
+          .Case("spsr_und", 0x36)
+          .Case("spsr_mon", 0x3c)
+          .Case("spsr_hyp", 0x3e)
+          .Default(-1);
+}
+
+// Maps a MClass special register string to its value for use in the
+// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
+// Returns -1 to signify that the string was invalid.
+static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
+  return StringSwitch<int>(RegString.lower())
+          .Case("apsr", 0x0)
+          .Case("iapsr", 0x1)
+          .Case("eapsr", 0x2)
+          .Case("xpsr", 0x3)
+          .Case("ipsr", 0x5)
+          .Case("epsr", 0x6)
+          .Case("iepsr", 0x7)
+          .Case("msp", 0x8)
+          .Case("psp", 0x9)
+          .Case("primask", 0x10)
+          .Case("basepri", 0x11)
+          .Case("basepri_max", 0x12)
+          .Case("faultmask", 0x13)
+          .Case("control", 0x14)
+          .Default(-1);
+}
+
+// The flags here are common to those allowed for apsr in the A class cores and
+// those allowed for the special registers in the M class cores. Returns a
+// value representing which flags were present, -1 if invalid.
+static inline int getMClassFlagsMask(StringRef Flags) {
+  if (Flags.empty())
+    return 0x3;
+
+  return StringSwitch<int>(Flags)
+          .Case("g", 0x1)
+          .Case("nzcvq", 0x2)
+          .Case("nzcvqg", 0x3)
+          .Default(-1);
+}
+
+static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
+                                 const ARMSubtarget *Subtarget) {
+  // Ensure that the register (without flags) was a valid M Class special
+  // register.
+  int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
+  if (SYSmvalue == -1)
+    return -1;
+
+  // basepri, basepri_max and faultmask are only valid for V7m.
+  if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
+    return -1;
+
+  // If it was a read then we won't be expecting flags and so at this point
+  // we can return the mask.
+  if (IsRead) {
+    assert (Flags.empty() && "Unexpected flags for reading M class register.");
+    return SYSmvalue;
+  }
+
+  // We know we are now handling a write so need to get the mask for the flags.
+  int Mask = getMClassFlagsMask(Flags);
+
+  // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
+  // shouldn't have flags present.
+  if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
+    return -1;
+
+  // The _g and _nzcvqg versions are only valid if the DSP extension is
+  // available.
+  if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
+    return -1;
+
+  // The register was valid so need to put the mask in the correct place
+  // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
+  // construct the operand for the instruction node.
+  if (SYSmvalue < 0x4)
+    return SYSmvalue | Mask << 10;
+
+  return SYSmvalue;
+}
+
+static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
+  // The mask operand contains the special register (R Bit) in bit 4, whether
+  // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
+  // bits 3-0 contains the fields to be accessed in the special register, set by
+  // the flags provided with the register.
+  int Mask = 0;
+  if (Reg == "apsr") {
+    // The flags permitted for apsr are the same flags that are allowed in
+    // M class registers. We get the flag value and then shift the flags into
+    // the correct place to combine with the mask.
+    Mask = getMClassFlagsMask(Flags);
+    if (Mask == -1)
+      return -1;
+    return Mask << 2;
+  }
+
+  if (Reg != "cpsr" && Reg != "spsr") {
+    return -1;
+  }
+
+  // This is the same as if the flags were "fc"
+  if (Flags.empty() || Flags == "all")
+    return Mask | 0x9;
+
+  // Inspect the supplied flags string and set the bits in the mask for
+  // the relevant and valid flags allowed for cpsr and spsr.
+  for (char Flag : Flags) {
+    int FlagVal;
+    switch (Flag) {
+      case 'c':
+        FlagVal = 0x1;
+        break;
+      case 'x':
+        FlagVal = 0x2;
+        break;
+      case 's':
+        FlagVal = 0x4;
+        break;
+      case 'f':
+        FlagVal = 0x8;
+        break;
+      default:
+        FlagVal = 0;
+    }
+
+    // This avoids allowing strings where the same flag bit appears twice.
+    if (!FlagVal || (Mask & FlagVal))
+      return -1;
+    Mask |= FlagVal;
+  }
+
+  // If the register is spsr then we need to set the R bit.
+  if (Reg == "spsr")
+    Mask |= 0x10;
+
+  return Mask;
+}
+
+// Lower the read_register intrinsic to ARM specific DAG nodes
+// using the supplied metadata string to select the instruction node to use
+// and the registers/masks to construct as operands for the node.
+SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
+  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+  bool IsThumb2 = Subtarget->isThumb2();
+  SDLoc DL(N);
+
+  std::vector<SDValue> Ops;
+  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
+
+  if (!Ops.empty()) {
+    // If the special register string was constructed of fields (as defined
+    // in the ACLE) then need to lower to MRC node (32 bit) or
+    // MRRC node(64 bit), we can make the distinction based on the number of
+    // operands we have.
+    unsigned Opcode;
+    SmallVector<EVT, 3> ResTypes;
+    if (Ops.size() == 5){
+      Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
+      ResTypes.append({ MVT::i32, MVT::Other });
+    } else {
+      assert(Ops.size() == 3 &&
+              "Invalid number of fields in special register string.");
+      Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
+      ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
+    }
+
+    Ops.push_back(getAL(CurDAG, DL));
+    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+    Ops.push_back(N->getOperand(0));
+    return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
+  }
+
+  std::string SpecialReg = RegString->getString().lower();
+
+  int BankedReg = getBankedRegisterMask(SpecialReg);
+  if (BankedReg != -1) {
+    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
+            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+            N->getOperand(0) };
+    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
+                                  DL, MVT::i32, MVT::Other, Ops);
+  }
+
+  // The VFP registers are read by creating SelectionDAG nodes with opcodes
+  // corresponding to the register that is being read from. So we switch on the
+  // string to find which opcode we need to use.
+  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
+                    .Case("fpscr", ARM::VMRS)
+                    .Case("fpexc", ARM::VMRS_FPEXC)
+                    .Case("fpsid", ARM::VMRS_FPSID)
+                    .Case("mvfr0", ARM::VMRS_MVFR0)
+                    .Case("mvfr1", ARM::VMRS_MVFR1)
+                    .Case("mvfr2", ARM::VMRS_MVFR2)
+                    .Case("fpinst", ARM::VMRS_FPINST)
+                    .Case("fpinst2", ARM::VMRS_FPINST2)
+                    .Default(0);
+
+  // If an opcode was found then we can lower the read to a VFP instruction.
+  if (Opcode) {
+    if (!Subtarget->hasVFP2())
+      return nullptr;
+    if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
+      return nullptr;
+
+    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+            N->getOperand(0) };
+    return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
+  }
+
+  // If the target is M Class then need to validate that the register string
+  // is an acceptable value, so check that a mask can be constructed from the
+  // string.
+  if (Subtarget->isMClass()) {
+    int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
+    if (SYSmValue == -1)
+      return nullptr;
+
+    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
+                      getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+                      N->getOperand(0) };
+    return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
+  }
+
+  // Here we know the target is not M Class so we need to check if it is one
+  // of the remaining possible values which are apsr, cpsr or spsr.
+  if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
+    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+            N->getOperand(0) };
+    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
+                                  MVT::i32, MVT::Other, Ops);
+  }
+
+  if (SpecialReg == "spsr") {
+    Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+            N->getOperand(0) };
+    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
+                                  DL, MVT::i32, MVT::Other, Ops);
+  }
+
+  return nullptr;
+}
+
+// Lower the write_register intrinsic to ARM specific DAG nodes
+// using the supplied metadata string to select the instruction node to use
+// and the registers/masks to use in the nodes
+SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
+  const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
+  const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+  bool IsThumb2 = Subtarget->isThumb2();
+  SDLoc DL(N);
+
+  std::vector<SDValue> Ops;
+  getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
+
+  if (!Ops.empty()) {
+    // If the special register string was constructed of fields (as defined
+    // in the ACLE) then need to lower to MCR node (32 bit) or
+    // MCRR node(64 bit), we can make the distinction based on the number of
+    // operands we have.
+    unsigned Opcode;
+    if (Ops.size() == 5) {
+      Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
+      Ops.insert(Ops.begin()+2, N->getOperand(2));
+    } else {
+      assert(Ops.size() == 3 &&
+              "Invalid number of fields in special register string.");
+      Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
+      SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
+      Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
+    }
+
+    Ops.push_back(getAL(CurDAG, DL));
+    Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+    Ops.push_back(N->getOperand(0));
+
+    return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+  }
+
+  std::string SpecialReg = RegString->getString().lower();
+  int BankedReg = getBankedRegisterMask(SpecialReg);
+  if (BankedReg != -1) {
+    Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
+            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+            N->getOperand(0) };
+    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
+                                  DL, MVT::Other, Ops);
+  }
+
+  // The VFP registers are written to by creating SelectionDAG nodes with
+  // opcodes corresponding to the register that is being written. So we switch
+  // on the string to find which opcode we need to use.
+  unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
+                    .Case("fpscr", ARM::VMSR)
+                    .Case("fpexc", ARM::VMSR_FPEXC)
+                    .Case("fpsid", ARM::VMSR_FPSID)
+                    .Case("fpinst", ARM::VMSR_FPINST)
+                    .Case("fpinst2", ARM::VMSR_FPINST2)
+                    .Default(0);
+
+  if (Opcode) {
+    if (!Subtarget->hasVFP2())
+      return nullptr;
+    Ops = { N->getOperand(2), getAL(CurDAG, DL),
+            CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
+    return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
+  }
+
+  SmallVector<StringRef, 5> Fields;
+  StringRef(SpecialReg).split(Fields, "_", 1, false);
+  std::string Reg = Fields[0].str();
+  StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
+
+  // If the target was M Class then need to validate the special register value
+  // and retrieve the mask for use in the instruction node.
+  if (Subtarget->isMClass()) {
+    // basepri_max gets split so need to correct Reg and Flags.
+    if (SpecialReg == "basepri_max") {
+      Reg = SpecialReg;
+      Flags = "";
+    }
+    int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
+    if (SYSmValue == -1)
+      return nullptr;
+
+    SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
+                      N->getOperand(2), getAL(CurDAG, DL),
+                      CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
+    return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
+  }
+
+  // We then check to see if a valid mask can be constructed for one of the
+  // register string values permitted for the A and R class cores. These values
+  // are apsr, spsr and cpsr; these are also valid on older cores.
+  int Mask = getARClassRegisterMask(Reg, Flags);
+  if (Mask != -1) {
+    Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
+            getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
+            N->getOperand(0) };
+    return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
+                                  DL, MVT::Other, Ops);
+  }
+
+  return nullptr;
+}
+
 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
   std::vector<SDValue> AsmNodeOperands;
   unsigned Flag, Kind;
@@ -3492,13 +3920,29 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
 bool ARMDAGToDAGISel::
 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                              std::vector<SDValue> &OutOps) {
-  assert(ConstraintID == InlineAsm::Constraint_m &&
-         "unexpected asm memory constraint");
-  // Require the address to be in a register.  That is safe for all ARM
-  // variants and it is hard to do anything much smarter without knowing
-  // how the operand is used.
-  OutOps.push_back(Op);
-  return false;
+  switch(ConstraintID) {
+  default:
+    llvm_unreachable("Unexpected asm memory constraint");
+  case InlineAsm::Constraint_i:
+    // FIXME: It seems strange that 'i' is needed here since it's supposed to
+    //        be an immediate and not a memory constraint.
+    // Fallthrough.
+  case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_Q:
+  case InlineAsm::Constraint_Um:
+  case InlineAsm::Constraint_Un:
+  case InlineAsm::Constraint_Uq:
+  case InlineAsm::Constraint_Us:
+  case InlineAsm::Constraint_Ut:
+  case InlineAsm::Constraint_Uv:
+  case InlineAsm::Constraint_Uy:
+    // Require the address to be in a register.  That is safe for all ARM
+    // variants and it is hard to do anything much smarter without knowing
+    // how the operand is used.
+    OutOps.push_back(Op);
+    return false;
+  }
+  return true;
 }
 
 /// createARMISelDag - This pass converts a legalized DAG into a
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 629cc90d67de..47c8400a668f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -426,6 +426,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 
+  setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
+  setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
+
   if (Subtarget->hasNEON()) {
     addDRTypeForNEON(MVT::v2f32);
     addDRTypeForNEON(MVT::v8i8);
@@ -2378,6 +2381,24 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   return !Subtarget->isThumb1Only();
 }
 
+// Trying to write a 64 bit value so need to split into two 32 bit values first,
+// and pass the lower and high parts through.
+static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  SDValue WriteValue = Op->getOperand(2);
+
+  // This function is only supposed to be called for i64 type argument.
+  assert(WriteValue.getValueType() == MVT::i64
+          && "LowerWRITE_REGISTER called for non-i64 type argument.");
+
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
+                           DAG.getConstant(0, DL, MVT::i32));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
+                           DAG.getConstant(1, DL, MVT::i32));
+  SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
+  return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
+}
+
 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
 // one of the above mentioned nodes. It has to be wrapped because otherwise
@@ -4085,7 +4106,28 @@ unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
                        .Default(0);
   if (Reg)
     return Reg;
-  report_fatal_error("Invalid register name global variable");
+  report_fatal_error(Twine("Invalid register name \""
+                              + StringRef(RegName)  + "\"."));
+}
+
+// Result is 64 bit value so split into two 32 bit values and return as a
+// pair of values.
+static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                                SelectionDAG &DAG) {
+  SDLoc DL(N);
+
+  // This function is only supposed to be called for i64 type destination.
+  assert(N->getValueType(0) == MVT::i64
+          && "ExpandREAD_REGISTER called for non-i64 type result.");
+
+  SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
+                             DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
+                             N->getOperand(0),
+                             N->getOperand(1));
+
+  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
+                    Read.getValue(1)));
+  Results.push_back(Read.getOperand(0));
 }
 
 /// ExpandBITCAST - If the target supports VFP, this function is called to
@@ -6355,6 +6397,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Don't know how to custom lower this!");
+  case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
   case ISD::GlobalAddress:
@@ -6439,6 +6482,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this!");
+  case ISD::READ_REGISTER:
+    ExpandREAD_REGISTER(N, Results, DAG);
+    break;
   case ISD::BITCAST:
     Res = ExpandBITCAST(N, DAG);
     break;
@@ -10222,7 +10268,8 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                              Type *Ty) const {
+                                              Type *Ty,
+                                              unsigned AS) const {
   EVT VT = getValueType(Ty, true);
   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
     return false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 63e87c5282d1..c0b329c5a1e5 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -286,7 +286,8 @@ namespace llvm {
 
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
-    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+    bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+                               unsigned AS) const override;
     bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
 
     /// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -346,8 +347,31 @@ namespace llvm {
 
     unsigned getInlineAsmMemConstraint(
         const std::string &ConstraintCode) const override {
-      // FIXME: Map different constraints differently.
-      return InlineAsm::Constraint_m;
+      if (ConstraintCode == "Q")
+        return InlineAsm::Constraint_Q;
+      else if (ConstraintCode.size() == 2) {
+        if (ConstraintCode[0] == 'U') {
+          switch(ConstraintCode[1]) {
+          default:
+            break;
+          case 'm':
+            return InlineAsm::Constraint_Um;
+          case 'n':
+            return InlineAsm::Constraint_Un;
+          case 'q':
+            return InlineAsm::Constraint_Uq;
+          case 's':
+            return InlineAsm::Constraint_Us;
+          case 't':
+            return InlineAsm::Constraint_Ut;
+          case 'v':
+            return InlineAsm::Constraint_Uv;
+          case 'y':
+            return InlineAsm::Constraint_Uy;
+          }
+        }
+      }
+      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }
 
     const ARMSubtarget* getSubtarget() const {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 778fd17137f6..b8cac135baf6 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1826,6 +1826,32 @@ def CONSTPOOL_ENTRY :
 PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
                     i32imm:$size), NoItinerary, []>;
 
+/// A jumptable consisting of direct 32-bit addresses of the destination basic
+/// blocks (either absolute, or relative to the start of the jump-table in PIC
+/// mode). Used mostly in ARM and Thumb-1 modes.
+def JUMPTABLE_ADDRS :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+                        i32imm:$size), NoItinerary, []>;
+
+/// A jumptable consisting of 32-bit jump instructions. Used for Thumb-2 tables
+/// that cannot be optimised to use TBB or TBH.
+def JUMPTABLE_INSTS :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+                        i32imm:$size), NoItinerary, []>;
+
+/// A jumptable consisting of 8-bit unsigned integers representing offsets from
+/// a TBB instruction.
+def JUMPTABLE_TBB :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+                        i32imm:$size), NoItinerary, []>;
+
+/// A jumptable consisting of 16-bit unsigned integers representing offsets from
+/// a TBH instruction.
+def JUMPTABLE_TBH :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+                        i32imm:$size), NoItinerary, []>;
+
+
 // FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
 // from removing one half of the matched pairs. That breaks PEI, which assumes
 // these will always be in pairs, and asserts if it finds otherwise. Better way?
@@ -2224,7 +2250,7 @@ let isBranch = 1, isTerminator = 1 in {
                 [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>,
                 Sched<[WriteBr]>;
 
-    let isNotDuplicable = 1, isIndirectBranch = 1 in {
+    let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
     def BR_JTr : ARMPseudoInst<(outs),
                       (ins GPR:$target, i32imm:$jt),
                       0, IIC_Br,
@@ -5039,10 +5065,11 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
                               imm:$CRm, imm:$opc2),
                 (MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
 
-class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
-  : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
-        GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm),
-        NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
+                 pattern = []>
+  : ABI<0b1100, oops, iops, NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm",
+        pattern> {
+
   let Inst{23-21} = 0b010;
   let Inst{20} = direction;
 
@@ -5060,9 +5087,13 @@ class MovRRCopro<string opc, bit direction, list<dag> pattern = []>
 }
 
 def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
+                      (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
+                      GPRnopc:$Rt2, c_imm:$CRm),
                       [(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
                                      GPRnopc:$Rt2, imm:$CRm)]>;
-def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
+                      (outs GPRnopc:$Rt, GPRnopc:$Rt2),
+                      (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
 
 class MovRRCopro2<string opc, bit direction, list<dag> pattern = []>
   : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 0fecfa1319d3..40414da3ca81 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -526,6 +526,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
                       0, IIC_Br,
                       [(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
                       Sched<[WriteBrTbl]> {
+    let Size = 2;
     list<Predicate> Predicates = [IsThumb, IsThumb1Only];
   }
 }
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 814b524b2bcb..aba8a7b10fd9 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3531,20 +3531,20 @@ def t2B   : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
   let AsmMatchConverter = "cvtThumbBranches";
 }
 
-let isNotDuplicable = 1, isIndirectBranch = 1 in {
+let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
 def t2BR_JT : t2PseudoInst<(outs),
           (ins GPR:$target, GPR:$index, i32imm:$jt),
            0, IIC_Br,
           [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
           Sched<[WriteBr]>;
 
-// FIXME: Add a non-pc based case that can be predicated.
+// FIXME: Add a case that can be predicated.
 def t2TBB_JT : t2PseudoInst<(outs),
-        (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
+        (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
         Sched<[WriteBr]>;
 
 def t2TBH_JT : t2PseudoInst<(outs),
-        (ins GPR:$index, i32imm:$jt), 0, IIC_Br, []>,
+        (ins GPR:$base, GPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
         Sched<[WriteBr]>;
 
 def t2TBB : T2I<(outs), (ins addrmode_tbb:$addr), IIC_Br,
@@ -4141,11 +4141,9 @@ class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
   let Inst{19-16} = CRn;
 }
 
-class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
                    list<dag> pattern = []>
-  : T2Cop<Op, (outs),
-          (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
-          opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
+  : T2Cop<Op, oops, iops, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
   let Inst{27-24} = 0b1100;
   let Inst{23-21} = 0b010;
   let Inst{20} = direction;
@@ -4210,19 +4208,25 @@ def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
 
 
 /* from ARM core register to coprocessor */
-def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs),
+                         (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
+                         c_imm:$CRm),
                         [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
                                        imm:$CRm)]>;
-def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
-                           [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
-                                           GPR:$Rt2, imm:$CRm)]> {
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs),
+                          (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
+                           c_imm:$CRm),
+                          [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
+                                          GPR:$Rt2, imm:$CRm)]> {
   let Predicates = [IsThumb2, PreV8];
 }
 
 /* from coprocessor to ARM core register */
-def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1, (outs GPR:$Rt, GPR:$Rt2),
+                          (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)>;
 
-def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1> {
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2),
+                           (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm)> {
   let Predicates = [IsThumb2, PreV8];
 }
 
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 5b62a21706ce..46ff326ba630 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains a pass that performs load / store related peephole
-// optimizations. This pass should be run after register allocation.
+/// \file This file contains a pass that performs load / store related peephole
+/// optimizations. This pass should be run after register allocation.
 //
 //===----------------------------------------------------------------------===//
 
@@ -58,10 +58,9 @@ STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
 
-/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
-/// load / store instructions to form ldm / stm instructions.
-
 namespace {
+  /// Post- register allocation pass the combine load / store instructions to
+  /// form ldm / stm instructions.
   struct ARMLoadStoreOpt : public MachineFunctionPass {
     static char ID;
     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
@@ -271,10 +270,7 @@ static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
   }
 }
 
-namespace llvm {
-  namespace ARM_AM {
-
-AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
+static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
   switch (Opcode) {
   default: llvm_unreachable("Unhandled opcode!");
   case ARM::LDMIA_RET:
@@ -328,9 +324,6 @@ AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
   }
 }
 
-  } // end namespace ARM_AM
-} // end namespace llvm
-
 static bool isT1i32Load(unsigned Opc) {
   return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
 }
@@ -469,9 +462,9 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
   }
 }
 
-/// MergeOps - Create and insert a LDM or STM with Base as base register and
-/// registers in Regs as the register operands that would be loaded / stored.
-/// It returns true if the transformation is done.
+/// Create and insert a LDM or STM with Base as base register and registers in
+/// Regs as the register operands that would be loaded / stored.  It returns
+/// true if the transformation is done.
 bool
 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI,
@@ -665,7 +658,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
   return true;
 }
 
-/// \brief Find all instructions using a given imp-def within a range.
+/// Find all instructions using a given imp-def within a range.
 ///
 /// We are trying to combine a range of instructions, one of which (located at
 /// position RangeBegin) implicitly defines a register. The final LDM/STM will
@@ -721,8 +714,7 @@ void ARMLoadStoreOpt::findUsesOfImpDef(
   }
 }
 
-// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
-// success.
+/// Call MergeOps and update MemOps and merges accordingly on success.
 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
                                      MemOpQueue &memOps,
                                      unsigned memOpsBegin, unsigned memOpsEnd,
@@ -762,10 +754,10 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
     Regs.push_back(std::make_pair(Reg, isKill));
 
     // Collect any implicit defs of super-registers. They must be preserved.
-    for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
-      if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
+    for (const MachineOperand &MO : memOps[i].MBBI->operands()) {
+      if (!MO.isReg() || !MO.isDef() || !MO.isImplicit() || MO.isDead())
         continue;
-      unsigned DefReg = MO->getReg();
+      unsigned DefReg = MO.getReg();
       if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
         ImpDefs.push_back(DefReg);
 
@@ -823,8 +815,8 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
   }
 }
 
-/// MergeLDR_STR - Merge a number of load / store instructions into one or more
-/// load / store multiple instructions.
+/// Merge a number of load / store instructions into one or more load / store
+/// multiple instructions.
 void
 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
                          unsigned Base, unsigned Opcode, unsigned Size,
@@ -1083,8 +1075,8 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
   }
 }
 
-/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
-/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
 ///
 /// stmia rn, <ra, rb, rc>
 /// rn := rn + 4 * 3;
@@ -1118,7 +1110,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
       return false;
 
   bool DoMerge = false;
-  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+  ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
 
   // Try merging with the previous instruction.
   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -1231,8 +1223,8 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
   }
 }
 
-/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
-/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+/// Fold proceeding/trailing inc/dec of base register into the
+/// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator MBBI,
                                                const TargetInstrInfo *TII,
@@ -1373,8 +1365,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   return true;
 }
 
-/// isMemoryOp - Returns true if instruction is a memory operation that this
-/// pass is capable of operating on.
+/// Returns true if instruction is a memory operation that this pass is capable
+/// of operating on.
 static bool isMemoryOp(const MachineInstr *MI) {
   // When no memory operands are present, conservatively assume unaligned,
   // volatile, unfoldable.
@@ -1428,8 +1420,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
   return false;
 }
 
-/// AdvanceRS - Advance register scavenger to just before the earliest memory
-/// op that is being merged.
+/// Advance register scavenger to just before the earliest memory op that is
+/// being merged.
 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
   unsigned Position = MemOps[0].Position;
@@ -1472,8 +1464,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator &MBBI) {
   MachineInstr *MI = &*MBBI;
   unsigned Opcode = MI->getOpcode();
-  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
-      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
     const MachineOperand &BaseOp = MI->getOperand(2);
     unsigned BaseReg = BaseOp.getReg();
     unsigned EvenReg = MI->getOperand(0).getReg();
@@ -1588,8 +1579,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   return false;
 }
 
-/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
-/// ops of the same base and incrementing offset into LDM / STM ops.
+/// An optimization pass to turn multiple LDR / STR ops of the same base and
+/// incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   unsigned NumMerges = 0;
   unsigned NumMemOps = 0;
@@ -1770,9 +1761,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   return NumMerges > 0;
 }
 
-/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
-/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
-/// directly restore the value of LR into pc.
+/// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
+/// into the preceding stack restore so it directly restore the value of LR
+/// into pc.
 ///   ldmfd sp!, {..., lr}
 ///   bx lr
 /// or
@@ -1834,12 +1825,9 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   return Modified;
 }
 
-
-/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
-/// load / stores from consecutive locations close to make it more
-/// likely they will be combined later.
-
 namespace {
+  /// Pre- register allocation pass that move load / stores from consecutive
+  /// locations close to make it more likely they will be combined later.
   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
     static char ID;
     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
@@ -1936,7 +1924,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
 }
 
 
-/// Copy Op0 and Op1 operands into a new array assigned to MI.
+/// Copy \p Op0 and \p Op1 operands into a new array assigned to MI.
 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
                                    MachineInstr *Op1) {
   assert(MI->memoperands_empty() && "expected a new machineinstr");
@@ -1954,10 +1942,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
 
 bool
 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
-                                          DebugLoc &dl,
-                                          unsigned &NewOpc, unsigned &EvenReg,
-                                          unsigned &OddReg, unsigned &BaseReg,
-                                          int &Offset, unsigned &PredReg,
+                                          DebugLoc &dl, unsigned &NewOpc,
+                                          unsigned &FirstReg,
+                                          unsigned &SecondReg,
+                                          unsigned &BaseReg, int &Offset,
+                                          unsigned &PredReg,
                                           ARMCC::CondCodes &Pred,
                                           bool &isT2) {
   // Make sure we're allowed to generate LDRD/STRD.
@@ -2016,9 +2005,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
       return false;
     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
   }
-  EvenReg = Op0->getOperand(0).getReg();
-  OddReg  = Op1->getOperand(0).getReg();
-  if (EvenReg == OddReg)
+  FirstReg = Op0->getOperand(0).getReg();
+  SecondReg = Op1->getOperand(0).getReg();
+  if (FirstReg == SecondReg)
     return false;
   BaseReg = Op0->getOperand(1).getReg();
   Pred = getInstrPredicate(Op0, PredReg);
@@ -2114,7 +2103,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         // to try to allocate a pair of registers that can form register pairs.
         MachineInstr *Op0 = Ops.back();
         MachineInstr *Op1 = Ops[Ops.size()-2];
-        unsigned EvenReg = 0, OddReg = 0;
+        unsigned FirstReg = 0, SecondReg = 0;
         unsigned BaseReg = 0, PredReg = 0;
         ARMCC::CondCodes Pred = ARMCC::AL;
         bool isT2 = false;
@@ -2122,21 +2111,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         int Offset = 0;
         DebugLoc dl;
         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
-                                             EvenReg, OddReg, BaseReg,
+                                             FirstReg, SecondReg, BaseReg,
                                              Offset, PredReg, Pred, isT2)) {
           Ops.pop_back();
           Ops.pop_back();
 
           const MCInstrDesc &MCID = TII->get(NewOpc);
           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
-          MRI->constrainRegClass(EvenReg, TRC);
-          MRI->constrainRegClass(OddReg, TRC);
+          MRI->constrainRegClass(FirstReg, TRC);
+          MRI->constrainRegClass(SecondReg, TRC);
 
           // Form the pair instruction.
           if (isLd) {
             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
-              .addReg(EvenReg, RegState::Define)
-              .addReg(OddReg, RegState::Define)
+              .addReg(FirstReg, RegState::Define)
+              .addReg(SecondReg, RegState::Define)
               .addReg(BaseReg);
             // FIXME: We're converting from LDRi12 to an insn that still
             // uses addrmode2, so we need an explicit offset reg. It should
@@ -2149,8 +2138,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
             ++NumLDRDFormed;
           } else {
             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
-              .addReg(EvenReg)
-              .addReg(OddReg)
+              .addReg(FirstReg)
+              .addReg(SecondReg)
               .addReg(BaseReg);
             // FIXME: We're converting from LDRi12 to an insn that still
             // uses addrmode2, so we need an explicit offset reg. It should
@@ -2165,9 +2154,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
           MBB->erase(Op0);
           MBB->erase(Op1);
 
-          // Add register allocation hints to form register pairs.
-          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
-          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
+          if (!isT2) {
+            // Add register allocation hints to form register pairs.
+            MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
+            MRI->setRegAllocationHint(SecondReg,  ARMRI::RegPairOdd, FirstReg);
+          }
         } else {
           for (unsigned i = 0; i != NumMove; ++i) {
             MachineInstr *Op = Ops.back();
@@ -2292,8 +2283,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
 }
 
 
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
+/// Returns an instance of the load / store optimization pass.
 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
   if (PreAlloc)
     return new ARMPreAllocLoadStoreOpt();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index e370b962ba7f..a2aca2d1a69e 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -30,35 +30,35 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
   unsigned Option = MO.getTargetFlags() & ARMII::MO_OPTION_MASK;
   switch (Option) {
   default: {
-    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+    Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
                                    OutContext);
     switch (Option) {
     default: llvm_unreachable("Unknown target flag on symbol operand");
     case ARMII::MO_NO_FLAG:
       break;
     case ARMII::MO_LO16:
-      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+      Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
                                      OutContext);
-      Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
+      Expr = ARMMCExpr::createLower16(Expr, OutContext);
       break;
     case ARMII::MO_HI16:
-      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+      Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
                                      OutContext);
-      Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
+      Expr = ARMMCExpr::createUpper16(Expr, OutContext);
       break;
     }
     break;
   }
 
   case ARMII::MO_PLT:
-    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_PLT,
+    Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_PLT,
                                    OutContext);
     break;
   }
 
   if (!MO.isJTI() && MO.getOffset())
-    Expr = MCBinaryExpr::CreateAdd(Expr,
-                                   MCConstantExpr::Create(MO.getOffset(),
+    Expr = MCBinaryExpr::createAdd(Expr,
+                                   MCConstantExpr::create(MO.getOffset(),
                                                           OutContext),
                                    OutContext);
   return MCOperand::createExpr(Expr);
@@ -80,7 +80,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
     MCOp = MCOperand::createImm(MO.getImm());
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+    MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
         MO.getMBB()->getSymbol(), OutContext));
     break;
   case MachineOperand::MO_GlobalAddress: {
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index e794fb71af63..0aceaed87510 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -304,10 +304,6 @@ public:
     return getTM<ARMBaseTargetMachine>();
   }
 
-  const ARMSubtarget &getARMSubtarget() const {
-    return *getARMTargetMachine().getSubtargetImpl();
-  }
-
   void addIRPasses() override;
   bool addPreISel() override;
   bool addInstSelector() override;
@@ -330,24 +326,28 @@ void ARMPassConfig::addIRPasses() {
   // Cmpxchg instructions are often used with a subsequent comparison to
   // determine whether it succeeded. We can exploit existing control-flow in
   // ldrex/strex loops to simplify this, but it needs tidying up.
-  const ARMSubtarget *Subtarget = &getARMSubtarget();
-  if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
-    if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
-      addPass(createCFGSimplificationPass());
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+    addPass(createCFGSimplificationPass(-1, [this](const Function &F) {
+      const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
+      return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
+    }));
 
   TargetPassConfig::addIRPasses();
 }
 
 bool ARMPassConfig::addPreISel() {
-  if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+  if ((TM->getOptLevel() != CodeGenOpt::None &&
        EnableGlobalMerge == cl::BOU_UNSET) ||
-      EnableGlobalMerge == cl::BOU_TRUE)
+      EnableGlobalMerge == cl::BOU_TRUE) {
     // FIXME: This is using the thumb1 only constant value for
     // maximal global offset for merging globals. We may want
     // to look into using the old value for non-thumb1 code of
     // 4095 based on the TargetMachine, but this starts to become
     // tricky when doing code gen per function.
-    addPass(createGlobalMergePass(TM, 127));
+    bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
+                               (EnableGlobalMerge == cl::BOU_UNSET);
+    addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
+  }
 
   return false;
 }
@@ -387,10 +387,13 @@ void ARMPassConfig::addPreSched2() {
 
   if (getOptLevel() != CodeGenOpt::None) {
     // in v8, IfConversion depends on Thumb instruction widths
-    if (getARMSubtarget().restrictIT())
-      addPass(createThumb2SizeReductionPass());
-    if (!getARMSubtarget().isThumb1Only())
-      addPass(&IfConverterID);
+    addPass(createThumb2SizeReductionPass([this](const Function &F) {
+      return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
+    }));
+
+    addPass(createIfConverter([this](const Function &F) {
+      return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only();
+    }));
   }
   addPass(createThumb2ITBlockPass());
 }
@@ -399,8 +402,9 @@ void ARMPassConfig::addPreEmitPass() {
   addPass(createThumb2SizeReductionPass());
 
   // Constant island pass work on unbundled instructions.
-  if (getARMSubtarget().isThumb2())
-    addPass(&UnpackMachineBundlesID);
+  addPass(createUnpackMachineBundles([this](const Function &F) {
+    return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2();
+  }));
 
   // Don't optimize barriers at -O0.
   if (getOptLevel() != CodeGenOpt::None)
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 80f03c62bbfb..eaed5cc68750 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -50,12 +50,12 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
 
   assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
 
-  return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang),
+  return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang),
                                  MCSymbolRefExpr::VK_ARM_TARGET2, getContext());
 }
 
 const MCExpr *ARMElfTargetObjectFile::
 getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
-  return MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO,
+  return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO,
                                  getContext());
 }
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 30c7d62e84b8..8bcbb1159f81 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -1051,7 +1051,7 @@ public:
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return (ARM_AM::getSOImmVal(Value) != -1 ||
-            ARM_AM::getSOImmVal(-Value) != -1);;
+            ARM_AM::getSOImmVal(-Value) != -1);
   }
   bool isT2SOImm() const {
     if (!isImm()) return false;
@@ -4252,7 +4252,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
     Error(S, "'be' or 'le' operand expected");
     return MatchOperand_ParseFail;
   }
-  Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::Create(Val,
+  Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::create(Val,
                                                                   getContext()),
                                            S, Tok.getEndLoc()));
   return MatchOperand_Success;
@@ -4656,7 +4656,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
       Val = INT32_MIN;
 
     Operands.push_back(
-      ARMOperand::CreateImm(MCConstantExpr::Create(Val, getContext()), S, E));
+      ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, E));
 
     return MatchOperand_Success;
   }
@@ -4886,7 +4886,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) {
     // If the constant was #-0, represent it as INT32_MIN.
     int32_t Val = CE->getValue();
     if (isNegative && Val == 0)
-      CE = MCConstantExpr::Create(INT32_MIN, getContext());
+      CE = MCConstantExpr::create(INT32_MIN, getContext());
 
     // Now we should have the closing ']'
     if (Parser.getTok().isNot(AsmToken::RBrac))
@@ -5073,7 +5073,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
     IntVal ^= (uint64_t)isNegative << 31;
     Parser.Lex(); // Eat the token.
     Operands.push_back(ARMOperand::CreateImm(
-          MCConstantExpr::Create(IntVal, getContext()),
+          MCConstantExpr::create(IntVal, getContext()),
           S, Parser.getTok().getLoc()));
     return MatchOperand_Success;
   }
@@ -5090,7 +5090,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
     Val = APFloat(RealVal).bitcastToAPInt().getZExtValue();
 
     Operands.push_back(ARMOperand::CreateImm(
-        MCConstantExpr::Create(Val, getContext()), S,
+        MCConstantExpr::create(Val, getContext()), S,
         Parser.getTok().getLoc()));
     return MatchOperand_Success;
   }
@@ -5179,7 +5179,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       if (CE) {
         int32_t Val = CE->getValue();
         if (isNegative && Val == 0)
-          ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
+          ImmVal = MCConstantExpr::create(INT32_MIN, getContext());
       }
       E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
       Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
@@ -5209,7 +5209,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     if (getParser().parseExpression(SubExprVal))
       return true;
 
-    const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+    const MCExpr *ExprVal = ARMMCExpr::create(RefKind, SubExprVal,
                                               getContext());
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
     Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
@@ -5765,7 +5765,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // Add the processor imod operand, if necessary.
   if (ProcessorIMod) {
     Operands.push_back(ARMOperand::CreateImm(
-          MCConstantExpr::Create(ProcessorIMod, getContext()),
+          MCConstantExpr::create(ProcessorIMod, getContext()),
                                  NameLoc, NameLoc));
   } else if (Mnemonic == "cps" && isMClass()) {
     return Error(NameLoc, "instruction 'cps' requires effect for M-class");
@@ -6752,13 +6752,13 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
       MCSymbol *Dot = getContext().createTempSymbol();
       Out.EmitLabel(Dot);
       const MCExpr *OpExpr = Inst.getOperand(2).getExpr();
-      const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot,
+      const MCExpr *InstPC = MCSymbolRefExpr::create(Dot,
                                                      MCSymbolRefExpr::VK_None,
                                                      getContext());
-      const MCExpr *Const8 = MCConstantExpr::Create(8, getContext());
-      const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8,
+      const MCExpr *Const8 = MCConstantExpr::create(8, getContext());
+      const MCExpr *ReadPC = MCBinaryExpr::createAdd(InstPC, Const8,
                                                      getContext());
-      const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr,
+      const MCExpr *FixupAddr = MCBinaryExpr::createAdd(ReadPC, OpExpr,
                                                         getContext());
       TmpInst.addOperand(MCOperand::createExpr(FixupAddr));
     }
@@ -9168,74 +9168,19 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
   StringRef CPU = getParser().parseStringToEndOfStatement().trim();
   getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
 
+  // FIXME: This is using table-gen data, but should be moved to
+  // ARMTargetParser once that is table-gen'd.
   if (!STI.isCPUStringValid(CPU)) {
     Error(L, "Unknown CPU name");
     return false;
   }
 
-  // FIXME: This switches the CPU features globally, therefore it might
-  // happen that code you would not expect to assemble will. For details
-  // see: http://llvm.org/bugs/show_bug.cgi?id=20757
   STI.InitMCProcessorInfo(CPU, "");
   STI.InitCPUSchedModel(CPU);
   setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
   return false;
 }
-
-// FIXME: This is duplicated in getARMFPUFeatures() in
-// tools/clang/lib/Driver/Tools.cpp
-static const struct {
-  const unsigned ID;
-  const FeatureBitset Enabled;
-  const FeatureBitset Disabled;
-} FPUs[] = {
-    {/* ID */ ARM::FK_VFP, 
-     /* Enabled */ {ARM::FeatureVFP2}, 
-     /* Disabled */ {ARM::FeatureNEON}},
-    {/* ID */ ARM::FK_VFPV2, 
-     /* Enabled */ {ARM::FeatureVFP2}, 
-     /* Disabled */ {ARM::FeatureNEON}},
-    {/* ID */ ARM::FK_VFPV3, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3},  
-     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}},
-    {/* ID */ ARM::FK_VFPV3_D16, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureD16},
-     /* Disabled */ {ARM::FeatureNEON}},
-    {/* ID */ ARM::FK_VFPV4, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4},
-     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureD16}},
-    {/* ID */ ARM::FK_VFPV4_D16, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
-                    ARM::FeatureD16},
-     /* Disabled */ {ARM::FeatureNEON}},
-    {/* ID */ ARM::FK_FPV5_D16, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
-                    ARM::FeatureFPARMv8, ARM::FeatureD16},
-     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto}},
-    {/* ID */ ARM::FK_FP_ARMV8, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
-                    ARM::FeatureFPARMv8},
-     /* Disabled */ {ARM::FeatureNEON, ARM::FeatureCrypto, ARM::FeatureD16}},
-    {/* ID */ ARM::FK_NEON, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON}, 
-     /* Disabled */ {ARM::FeatureD16}},
-    {/* ID */ ARM::FK_NEON_VFPV4, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
-                    ARM::FeatureNEON}, 
-     /* Disabled */ {ARM::FeatureD16}},
-    {/* ID */ ARM::FK_NEON_FP_ARMV8, 
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
-                    ARM::FeatureFPARMv8, ARM::FeatureNEON},
-     /* Disabled */ {ARM::FeatureCrypto, ARM::FeatureD16}},
-    {/* ID */ ARM::FK_CRYPTO_NEON_FP_ARMV8,
-     /* Enabled */ {ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureVFP4,
-                    ARM::FeatureFPARMv8, ARM::FeatureNEON, 
-                    ARM::FeatureCrypto},
-     /* Disabled */ {ARM::FeatureD16}},
-    {ARM::FK_SOFTVFP, {}, {}},
-};
-
 /// parseDirectiveFPU
 ///  ::= .fpu str
 bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
@@ -9243,23 +9188,15 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
   StringRef FPU = getParser().parseStringToEndOfStatement().trim();
 
   unsigned ID = ARMTargetParser::parseFPU(FPU);
-
-  if (ID == ARM::FK_INVALID) {
+  std::vector<const char *> Features;
+  if (!ARMTargetParser::getFPUFeatures(ID, Features)) {
     Error(FPUNameLoc, "Unknown FPU name");
     return false;
   }
 
-  for (const auto &Entry : FPUs) {
-    if (Entry.ID != ID)
-      continue;
-
-    // Need to toggle features that should be on but are off and that
-    // should off but are on.
-    FeatureBitset Toggle = (Entry.Enabled & ~STI.getFeatureBits()) |
-                           (Entry.Disabled & STI.getFeatureBits());
-    setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle)));
-    break;
-  }
+  for (auto Feature : Features)
+    STI.ApplyFeatureFlag(Feature);
+  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
   getTargetStreamer().emitFPU(ID);
   return false;
@@ -9804,7 +9741,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
   }
 
   const MCSymbolRefExpr *SRE =
-    MCSymbolRefExpr::Create(Parser.getTok().getIdentifier(),
+    MCSymbolRefExpr::create(Parser.getTok().getIdentifier(),
                             MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext());
   Lex();
 
@@ -9982,33 +9919,32 @@ extern "C" void LLVMInitializeARMAsmParser() {
 #define GET_MATCHER_IMPLEMENTATION
 #include "ARMGenAsmMatcher.inc"
 
+// FIXME: This structure should be moved inside ARMTargetParser
+// when we start to table-generate them, and we can use the ARM
+// flags below, that were generated by table-gen.
 static const struct {
-  const char *Name;
+  const ARM::ArchExtKind Kind;
   const unsigned ArchCheck;
   const FeatureBitset Features;
 } Extensions[] = {
-  { "crc", Feature_HasV8, {ARM::FeatureCRC} },
-  { "crypto",  Feature_HasV8,
+  { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
+  { ARM::AEK_CRYPTO,  Feature_HasV8,
     {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
-  { "fp", Feature_HasV8, {ARM::FeatureFPARMv8} },
-  { "idiv", Feature_HasV7 | Feature_IsNotMClass,
+  { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
+  { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass,
     {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
-  // FIXME: iWMMXT not supported
-  { "iwmmxt", Feature_None, {} },
-  // FIXME: iWMMXT2 not supported
-  { "iwmmxt2", Feature_None, {} },
-  // FIXME: Maverick not supported
-  { "maverick", Feature_None, {} },
-  { "mp", Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
-  // FIXME: ARMv6-m OS Extensions feature not checked
-  { "os", Feature_None, {} },
+  { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
+  { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
   // FIXME: Also available in ARMv6-K
-  { "sec", Feature_HasV7, {ARM::FeatureTrustZone} },
-  { "simd", Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
+  { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} },
   // FIXME: Only available in A-class, isel not predicated
-  { "virt", Feature_HasV7, {ARM::FeatureVirtualization} },
-  // FIXME: xscale not supported
-  { "xscale", Feature_None, {} },
+  { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
+  // FIXME: Unsupported extensions.
+  { ARM::AEK_OS, Feature_None, {} },
+  { ARM::AEK_IWMMXT, Feature_None, {} },
+  { ARM::AEK_IWMMXT2, Feature_None, {} },
+  { ARM::AEK_MAVERICK, Feature_None, {} },
+  { ARM::AEK_XSCALE, Feature_None, {} },
 };
 
 /// parseDirectiveArchExtension
@@ -10031,9 +9967,12 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
     EnableFeature = false;
     Name = Name.substr(2);
   }
+  unsigned FeatureKind = ARMTargetParser::parseArchExt(Name);
+  if (FeatureKind == ARM::AEK_INVALID)
+    Error(ExtLoc, "unknown architectural extension: " + Name);
 
   for (const auto &Extension : Extensions) {
-    if (Extension.Name != Name)
+    if (Extension.Kind != FeatureKind)
       continue;
 
     if (Extension.Features.none())
@@ -10080,7 +10019,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
     if (Op.isImm()) {
       const MCExpr *SOExpr = Op.getImm();
       int64_t Value;
-      if (!SOExpr->EvaluateAsAbsolute(Value))
+      if (!SOExpr->evaluateAsAbsolute(Value))
         return Match_Success;
       assert((Value >= INT32_MIN && Value <= UINT32_MAX) &&
              "expression value must be representable in 32 bits");
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 2d36c3020016..0bff52141da5 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -329,7 +329,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     const MCExpr *Expr = Op.getExpr();
     switch (Expr->getKind()) {
     case MCExpr::Binary:
-      O << '#' << *Expr;
+      O << '#';
+      Expr->print(O, &MAI);
       break;
     case MCExpr::Constant: {
       // If a symbolic branch target was added as a constant expression then
@@ -337,8 +338,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       // address.
       const MCConstantExpr *Constant = cast<MCConstantExpr>(Expr);
       int64_t TargetAddress;
-      if (!Constant->EvaluateAsAbsolute(TargetAddress)) {
-        O << '#' << *Expr;
+      if (!Constant->evaluateAsAbsolute(TargetAddress)) {
+        O << '#';
+        Expr->print(O, &MAI);
       } else {
         O << "0x";
         O.write_hex(static_cast<uint32_t>(TargetAddress));
@@ -348,7 +350,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     default:
       // FIXME: Should we always treat this as if it is a constant literal and
       // prefix it with '#'?
-      O << *Expr;
+      Expr->print(O, &MAI);
       break;
     }
   }
@@ -359,7 +361,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
                                                raw_ostream &O) {
   const MCOperand &MO1 = MI->getOperand(OpNum);
   if (MO1.isExpr()) {
-    O << *MO1.getExpr();
+    MO1.getExpr()->print(O, &MAI);
     return;
   }
 
@@ -1055,7 +1057,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
   const MCOperand &MO = MI->getOperand(OpNum);
 
   if (MO.isExpr()) {
-    O << *MO.getExpr();
+    MO.getExpr()->print(O, &MAI);
     return;
   }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index f0eed9b811d4..b03cada9a641 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -622,8 +622,6 @@ namespace ARM_AM {
     return Value;
   }
 
-  AMSubMode getLoadStoreMultipleSubMode(int Opcode);
-
   //===--------------------------------------------------------------------===//
   // Floating-point Immediates
   //
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 6c1f7891f58a..be23e9070103 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -260,9 +260,9 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
         hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
     uint64_t NumNops = Count / 2;
     for (uint64_t i = 0; i != NumNops; ++i)
-      OW->Write16(nopEncoding);
+      OW->write16(nopEncoding);
     if (Count & 1)
-      OW->Write8(0);
+      OW->write8(0);
     return true;
   }
   // ARM mode
@@ -270,21 +270,21 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
       hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
   uint64_t NumNops = Count / 4;
   for (uint64_t i = 0; i != NumNops; ++i)
-    OW->Write32(nopEncoding);
+    OW->write32(nopEncoding);
   // FIXME: should this function return false when unable to write exactly
   // 'Count' bytes with NOP encodings?
   switch (Count % 4) {
   default:
     break; // No leftover bytes to write
   case 1:
-    OW->Write8(0);
+    OW->write8(0);
     break;
   case 2:
-    OW->Write16(0);
+    OW->write16(0);
     break;
   case 3:
-    OW->Write16(0);
-    OW->Write8(0xa0);
+    OW->write16(0);
+    OW->write8(0xa0);
     break;
   }
 
@@ -601,8 +601,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
   // the offset when the destination has the same MCFragment.
   if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
     const MCSymbol &Sym = A->getSymbol();
-    const MCSymbolData &SymData = Asm.getSymbolData(Sym);
-    IsResolved = (SymData.getFragment() == DF);
+    IsResolved = (Sym.getFragment() == DF);
   }
   // We must always generate a relocation for BL/BLX instructions if we have
   // a symbol to reference, as the linker relies on knowing the destination
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index f4fedeef650b..804d3534096a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -37,7 +37,7 @@ namespace {
     unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                           bool IsPCRel) const override;
 
-    bool needsRelocateWithSymbol(const MCSymbolData &SD,
+    bool needsRelocateWithSymbol(const MCSymbol &Sym,
                                  unsigned Type) const override;
   };
 }
@@ -49,7 +49,7 @@ ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
 
 ARMELFObjectWriter::~ARMELFObjectWriter() {}
 
-bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
                                                  unsigned Type) const {
   // FIXME: This is extremely conservative. This really needs to use a
   // whitelist with a clear explanation for why each realocation needs to
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 0eb5a8136e88..6e3af739eca2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -22,9 +22,7 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
 #include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
@@ -34,7 +32,7 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMEHABI.h"
@@ -216,7 +214,13 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
 }
 
 void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
-  OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n';
+  const MCAsmInfo *MAI = Streamer.getContext().getAsmInfo();
+
+  OS << "\t.thumb_set\t";
+  Symbol->print(OS, MAI);
+  OS << ", ";
+  Value->print(OS, MAI);
+  OS << '\n';
 }
 
 void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) {
@@ -562,17 +566,16 @@ private:
     MCSymbol *Start = getContext().createTempSymbol();
     EmitLabel(Start);
 
-    MCSymbol *Symbol =
-      getContext().getOrCreateSymbol(Name + "." +
-                                     Twine(MappingSymbolCounter++));
+    auto *Symbol = cast<MCSymbolELF>(getContext().getOrCreateSymbol(
+        Name + "." + Twine(MappingSymbolCounter++)));
 
-    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-    MCELF::SetType(SD, ELF::STT_NOTYPE);
-    MCELF::SetBinding(SD, ELF::STB_LOCAL);
-    SD.setExternal(false);
+    getAssembler().registerSymbol(*Symbol);
+    Symbol->setType(ELF::STT_NOTYPE);
+    Symbol->setBinding(ELF::STB_LOCAL);
+    Symbol->setExternal(false);
     AssignSection(Symbol, getCurrentSection().first);
 
-    const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
+    const MCExpr *Value = MCSymbolRefExpr::create(Start, getContext());
     Symbol->setVariableValue(Value);
   }
 
@@ -688,16 +691,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
   using namespace ARMBuildAttrs;
 
   setAttributeItem(CPU_name,
-                   ARMTargetParser::getArchDefaultCPUName(Arch),
+                   ARMTargetParser::getCPUAttr(Arch),
                    false);
 
   if (EmittedArch == ARM::AK_INVALID)
     setAttributeItem(CPU_arch,
-                     ARMTargetParser::getArchDefaultCPUArch(Arch),
+                     ARMTargetParser::getArchAttr(Arch),
                      false);
   else
     setAttributeItem(CPU_arch,
-                     ARMTargetParser::getArchDefaultCPUArch(EmittedArch),
+                     ARMTargetParser::getArchAttr(EmittedArch),
                      false);
 
   switch (Arch) {
@@ -813,6 +816,9 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
                      /* OverwriteExisting= */ false);
     break;
 
+  // ABI_HardFP_use is handled in ARMAsmPrinter, so _SP_D16 is treated the same
+  // as _D16 here.
+  case ARM::FK_FPV4_SP_D16:
   case ARM::FK_VFPV4_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPv4B,
@@ -827,6 +833,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
 
   // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so
   // uses the FP_ARMV8_D16 build attribute.
+  case ARM::FK_FPV5_SP_D16:
   case ARM::FK_FPV5_D16:
     setAttributeItem(ARMBuildAttrs::FP_arch,
                      ARMBuildAttrs::AllowFPARMv8B,
@@ -861,6 +868,7 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
     break;
 
   case ARM::FK_SOFTVFP:
+  case ARM::FK_NONE:
     break;
 
   default:
@@ -972,9 +980,9 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
   if (!Streamer.IsThumb)
     return;
 
-  const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol);
-  unsigned Type = MCELF::GetType(SD);
-  if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc)
+  Streamer.getAssembler().registerSymbol(*Symbol);
+  unsigned Type = cast<MCSymbolELF>(Symbol)->getType();
+  if (Type == ELF::STT_FUNC || Type == ELF::STT_GNU_IFUNC)
     Streamer.EmitThumbFunc(Symbol);
 }
 
@@ -1024,7 +1032,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
   }
 
   // Get .ARM.extab or .ARM.exidx section
-  const MCSymbol *Group = FnSection.getGroup();
+  const MCSymbolELF *Group = FnSection.getGroup();
   if (Group)
     Flags |= ELF::SHF_GROUP;
   MCSectionELF *EHSection =
@@ -1095,7 +1103,7 @@ void ARMELFStreamer::emitFnEnd() {
     EmitPersonalityFixup(GetAEABIUnwindPersonalityName(PersonalityIndex));
 
   const MCSymbolRefExpr *FnStartRef =
-    MCSymbolRefExpr::Create(FnStart,
+    MCSymbolRefExpr::create(FnStart,
                             MCSymbolRefExpr::VK_ARM_PREL31,
                             getContext());
 
@@ -1106,7 +1114,7 @@ void ARMELFStreamer::emitFnEnd() {
   } else if (ExTab) {
     // Emit a reference to the unwind opcodes in the ".ARM.extab" section.
     const MCSymbolRefExpr *ExTabEntryRef =
-      MCSymbolRefExpr::Create(ExTab,
+      MCSymbolRefExpr::create(ExTab,
                               MCSymbolRefExpr::VK_ARM_PREL31,
                               getContext());
     EmitValue(ExTabEntryRef, 4);
@@ -1138,7 +1146,7 @@ void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
 void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
   const MCSymbol *PersonalitySym = getContext().getOrCreateSymbol(Name);
 
-  const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
+  const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::create(
       PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
 
   visitUsedExpr(*PersonalityRef);
@@ -1186,7 +1194,7 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
   // Emit personality
   if (Personality) {
     const MCSymbolRefExpr *PersonalityRef =
-      MCSymbolRefExpr::Create(Personality,
+      MCSymbolRefExpr::create(Personality,
                               MCSymbolRefExpr::VK_ARM_PREL31,
                               getContext());
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index caa873622ae9..1ac08159bd3d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -19,8 +19,7 @@ using namespace llvm;
 
 void ARMMCAsmInfoDarwin::anchor() { }
 
-ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) {
-  Triple TheTriple(TT);
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
   if ((TheTriple.getArch() == Triple::armeb) ||
       (TheTriple.getArch() == Triple::thumbeb))
     IsLittleEndian = false;
@@ -41,8 +40,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) {
 
 void ARMELFMCAsmInfo::anchor() { }
 
-ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
-  Triple TheTriple(TT);
+ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) {
   if ((TheTriple.getArch() == Triple::armeb) ||
       (TheTriple.getArch() == Triple::thumbeb))
     IsLittleEndian = false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 6cb471537f6e..99a5fff5ec27 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -19,18 +19,19 @@
 #include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
+  class Triple;
 
   class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
     virtual void anchor();
 
   public:
-    explicit ARMMCAsmInfoDarwin(StringRef TT);
+    explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
   };
 
   class ARMELFMCAsmInfo : public MCAsmInfoELF {
     void anchor() override;
   public:
-    explicit ARMELFMCAsmInfo(StringRef TT);
+    explicit ARMELFMCAsmInfo(const Triple &TT);
 
     void setUseIntegratedAssembler(bool Value) override;
   };
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 5b90de327418..2063ca6bdf3b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -16,12 +16,12 @@ using namespace llvm;
 #define DEBUG_TYPE "armmcexpr"
 
 const ARMMCExpr*
-ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ARMMCExpr::create(VariantKind Kind, const MCExpr *Expr,
                        MCContext &Ctx) {
   return new (Ctx) ARMMCExpr(Kind, Expr);
 }
 
-void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+void ARMMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
   switch (Kind) {
   default: llvm_unreachable("Invalid kind!");
   case VK_ARM_HI16: OS << ":upper16:"; break;
@@ -31,7 +31,7 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
   const MCExpr *Expr = getSubExpr();
   if (Expr->getKind() != MCExpr::SymbolRef)
     OS << '(';
-  Expr->print(OS);
+  Expr->print(OS, MAI);
   if (Expr->getKind() != MCExpr::SymbolRef)
     OS << ')';
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index a52abe7760d1..9146d4def75a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -33,15 +33,15 @@ public:
   /// @name Construction
   /// @{
 
-  static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+  static const ARMMCExpr *create(VariantKind Kind, const MCExpr *Expr,
                                       MCContext &Ctx);
 
-  static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
-    return Create(VK_ARM_HI16, Expr, Ctx);
+  static const ARMMCExpr *createUpper16(const MCExpr *Expr, MCContext &Ctx) {
+    return create(VK_ARM_HI16, Expr, Ctx);
   }
 
-  static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
-    return Create(VK_ARM_LO16, Expr, Ctx);
+  static const ARMMCExpr *createLower16(const MCExpr *Expr, MCContext &Ctx) {
+    return create(VK_ARM_LO16, Expr, Ctx);
   }
 
   /// @}
@@ -56,15 +56,15 @@ public:
 
   /// @}
 
-  void PrintImpl(raw_ostream &OS) const override;
-  bool EvaluateAsRelocatableImpl(MCValue &Res,
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res,
                                  const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override {
     return false;
   }
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  MCSection *FindAssociatedSection() const override {
-    return getSubExpr()->FindAssociatedSection();
+  MCSection *findAssociatedSection() const override {
+    return getSubExpr()->findAssociatedSection();
   }
 
   // There are no TLS ARMMCExprs at the moment.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 30deba9a08c6..92c4d6a824ea 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -277,18 +277,17 @@ static MCRegisterInfo *createARMMCRegisterInfo(StringRef Triple) {
   return X;
 }
 
-static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
-  Triple TheTriple(TT);
-
+static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI,
+                                     const Triple &TheTriple) {
   MCAsmInfo *MAI;
   if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO())
-    MAI = new ARMMCAsmInfoDarwin(TT);
+    MAI = new ARMMCAsmInfoDarwin(TheTriple);
   else if (TheTriple.isWindowsItaniumEnvironment())
     MAI = new ARMCOFFMCAsmInfoGNU();
   else if (TheTriple.isWindowsMSVCEnvironment())
     MAI = new ARMCOFFMCAsmInfoMicrosoft();
   else
-    MAI = new ARMELFMCAsmInfo(TT);
+    MAI = new ARMELFMCAsmInfo(TheTriple);
 
   unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
   MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
index d4b00e6e4fb5..4468132588cf 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
@@ -26,9 +26,9 @@ public:
                                              unsigned VariantKind) override {
     switch(VariantKind) {
     case LLVMDisassembler_VariantKind_ARM_HI16:
-      return ARMMCExpr::CreateUpper16(SubExpr, Ctx);
+      return ARMMCExpr::createUpper16(SubExpr, Ctx);
     case LLVMDisassembler_VariantKind_ARM_LO16:
-      return ARMMCExpr::CreateLower16(SubExpr, Ctx);
+      return ARMMCExpr::createLower16(SubExpr, Ctx);
     default:
       return MCRelocationInfo::createExprForCAPIVariantKind(SubExpr,
                                                             VariantKind);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 9755330bf8c3..95d7ea7c04a3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -17,7 +17,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCValue.h"
@@ -49,12 +48,10 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
                                 const MCSymbol &S, uint64_t FixedValue);
 
 public:
-  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
-                      uint32_t CPUSubtype)
-    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
-                               /*UseAggressiveSymbolFolding=*/true) {}
+  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+      : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
 
-  void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
+  void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
                         const MCAsmLayout &Layout, const MCFragment *Fragment,
                         const MCFixup &Fixup, MCValue Target,
                         uint64_t &FixedValue) override;
@@ -152,23 +149,21 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
 
   // See <reloc.h>.
   const MCSymbol *A = &Target.getSymA()->getSymbol();
-  const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
-  if (!A_SD->getFragment())
+  if (!A->getFragment())
     Asm.getContext().reportFatalError(Fixup.getLoc(),
                        "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
 
   uint32_t Value = Writer->getSymbolAddress(*A, Layout);
   uint32_t Value2 = 0;
-  uint64_t SecAddr =
-      Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
   FixedValue += SecAddr;
 
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
-    const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+    const MCSymbol *SB = &B->getSymbol();
 
-    if (!B_SD->getFragment())
+    if (!SB->getFragment())
       Asm.getContext().reportFatalError(Fixup.getLoc(),
                          "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
@@ -176,7 +171,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
     // Select the appropriate difference relocation type.
     Type = MachO::ARM_RELOC_HALF_SECTDIFF;
     Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
-    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+    FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
   }
 
   // Relocations are written out in reverse order, so the PAIR comes first.
@@ -255,24 +250,22 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
 
   // See <reloc.h>.
   const MCSymbol *A = &Target.getSymA()->getSymbol();
-  const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
-  if (!A_SD->getFragment())
+  if (!A->getFragment())
     Asm.getContext().reportFatalError(Fixup.getLoc(),
                        "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
 
   uint32_t Value = Writer->getSymbolAddress(*A, Layout);
-  uint64_t SecAddr =
-      Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
   FixedValue += SecAddr;
   uint32_t Value2 = 0;
 
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
     assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
-    const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+    const MCSymbol *SB = &B->getSymbol();
 
-    if (!B_SD->getFragment())
+    if (!SB->getFragment())
       Asm.getContext().reportFatalError(Fixup.getLoc(),
                          "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
@@ -280,7 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
     // Select the appropriate difference relocation type.
     Type = MachO::ARM_RELOC_SECTDIFF;
     Value2 = Writer->getSymbolAddress(B->getSymbol(), Layout);
-    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+    FixedValue -= Writer->getSectionAddress(SB->getFragment()->getParent());
   }
 
   // Relocations are written out in reverse order, so the PAIR comes first.
@@ -344,7 +337,7 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer,
   return false;
 }
 
-void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
                                            MCAssembler &Asm,
                                            const MCAsmLayout &Layout,
                                            const MCFragment *Fragment,
@@ -405,7 +398,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
     // Resolve constant variables.
     if (A->isVariable()) {
       int64_t Res;
-      if (A->getVariableValue()->EvaluateAsAbsolute(
+      if (A->getVariableValue()->evaluateAsAbsolute(
               Res, Layout, Writer->getSectionAddressMap())) {
         FixedValue = Res;
         return;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b62ae2e3429e..68736bc1decd 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -94,12 +94,12 @@ static void TrackDefUses(MachineInstr *MI,
 /// conservatively remove more kill flags than are necessary, but removing them
 /// is safer than incorrect kill flags remaining on instructions.
 static void ClearKillFlags(MachineInstr *MI, SmallSet<unsigned, 4> &Uses) {
-  for (MIOperands MO(MI); MO.isValid(); ++MO) {
-    if (!MO->isReg() || MO->isDef() || !MO->isKill())
+  for (MachineOperand &MO : MI->operands()) {
+    if (!MO.isReg() || MO.isDef() || !MO.isKill())
       continue;
-    if (!Uses.count(MO->getReg()))
+    if (!Uses.count(MO.getReg()))
       continue;
-    MO->setIsKill(false);
+    MO.setIsKill(false);
   }
 }
 
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 0ab1ff906c9a..d9ab824995c1 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -133,7 +133,7 @@ namespace {
   class Thumb2SizeReduce : public MachineFunctionPass {
   public:
     static char ID;
-    Thumb2SizeReduce();
+    Thumb2SizeReduce(std::function<bool(const Function &)> Ftor);
 
     const Thumb2InstrInfo *TII;
     const ARMSubtarget *STI;
@@ -198,11 +198,14 @@ namespace {
     };
 
     SmallVector<MBBInfo, 8> BlockInfo;
+
+    std::function<bool(const Function &)> PredicateFtor;
   };
   char Thumb2SizeReduce::ID = 0;
 }
 
-Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
+    : MachineFunctionPass(ID), PredicateFtor(Ftor) {
   OptimizeSize = MinimizeSize = false;
   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     unsigned FromOpc = ReduceTable[i].WideOpc;
@@ -1000,6 +1003,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 }
 
 bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+  if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+    return false;
+
   STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
   if (STI->isThumb1Only() || STI->prefers32BitThumb())
     return false;
@@ -1025,6 +1031,7 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
 
 /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
 /// reduction pass.
-FunctionPass *llvm::createThumb2SizeReductionPass() {
-  return new Thumb2SizeReduce();
+FunctionPass *llvm::createThumb2SizeReductionPass(
+    std::function<bool(const Function &)> Ftor) {
+  return new Thumb2SizeReduce(Ftor);
 }
author	Dimitry Andric <dim@FreeBSD.org>	2015-06-09 19:06:30 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2015-06-09 19:06:30 +0000
commit	85d8b2bbe386bcfe669575d05b61482d7be07e5d (patch)
tree	1dc5e75ab222a9ead44c699eceafab7a6ca7b310 /lib/Target/ARM
parent	5a5ac124e1efaf208671f01c46edb15f29ed2a0b (diff)