vendor/llvm/llvm-trunk-r304222

author: Dimitry Andric <dim@FreeBSD.org> 2017-05-30 17:37:31 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-05-30 17:37:31 +0000
commit: ee2f195dd3e40f49698ca4dc2666ec09c770e80d (patch)
tree: 66fa9a69e5789356dfe844991e64bac9222f3a35 /lib/Target
parent: ab44ce3d598882e51a25eb82eb7ae6308de85ae6 (diff)
27 files changed, 1864 insertions, 116 deletions
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 0b92249580c82..e96ee7d29b3e8 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -137,6 +137,34 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
 
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
+/// Look at each instruction that references stack frames and return the stack
+/// size limit beyond which some of these instructions will require a scratch
+/// register during their expansion later.
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
+  // FIXME: For now, just conservatively guestimate based on unscaled indexing
+  // range. We'll end up allocating an unnecessary spill slot a lot, but
+  // realistically that's not a big deal at this stage of the game.
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      if (MI.isDebugValue() || MI.isPseudo() ||
+          MI.getOpcode() == AArch64::ADDXri ||
+          MI.getOpcode() == AArch64::ADDSXri)
+        continue;
+
+      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+        if (!MI.getOperand(i).isFI())
+          continue;
+
+        int Offset = 0;
+        if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
+            AArch64FrameOffsetCannotUpdate)
+          return 0;
+      }
+    }
+  }
+  return 255;
+}
+
 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
   if (!EnableRedZone)
     return false;
@@ -1169,16 +1197,13 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   unsigned NumRegsSpilled = SavedRegs.count();
   bool CanEliminateFrame = NumRegsSpilled == 0;
 
-  // FIXME: Set BigStack if any stack slot references may be out of range.
-  // For now, just conservatively guestimate based on unscaled indexing
-  // range. We'll end up allocating an unnecessary spill slot a lot, but
-  // realistically that's not a big deal at this stage of the game.
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
   MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
-  bool BigStack = (CFSize >= 256);
+  unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
+  bool BigStack = (CFSize > EstimatedStackSizeLimit);
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
     AFI->setHasStackFrame(true);
 
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 62f4c953830b2..f798010906ccf 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -381,7 +381,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
   setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
   setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
-  setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
   setOperationAction(ISD::FREM, MVT::v4f16, Expand);
   setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
   setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
@@ -413,7 +412,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
   setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
   setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
-  setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
   setOperationAction(ISD::FREM, MVT::v8f16, Expand);
   setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
   setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
@@ -726,7 +724,6 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
   if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
     setOperationAction(ISD::FSIN, VT, Expand);
     setOperationAction(ISD::FCOS, VT, Expand);
-    setOperationAction(ISD::FPOWI, VT, Expand);
     setOperationAction(ISD::FPOW, VT, Expand);
     setOperationAction(ISD::FLOG, VT, Expand);
     setOperationAction(ISD::FLOG2, VT, Expand);
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 660879426810f..0582ce95693a8 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -730,7 +730,7 @@ public:
   /// \returns True if waitcnt instruction is needed before barrier instruction,
   /// false otherwise.
   bool needWaitcntBeforeBarrier() const {
-    return getGeneration() < GFX9;
+    return true;
   }
 
   /// \returns true if the flat_scratch register should be initialized with the
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index a9d3a31a72407..48827f4639974 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -736,6 +736,9 @@ void GCNPassConfig::addMachineSSAOptimization() {
   addPass(createSIShrinkInstructionsPass());
   if (EnableSDWAPeephole) {
     addPass(&SIPeepholeSDWAID);
+    addPass(&MachineLICMID);
+    addPass(&MachineCSEID);
+    addPass(&SIFoldOperandsID);
     addPass(&DeadMachineInstructionElimID);
   }
 }
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index d63414735b95a..f13629a3185f3 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -247,9 +247,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
 
 // If the use operand doesn't care about the value, this may be an operand only
 // used for register indexing, in which case it is unsafe to fold.
-static bool isUseSafeToFold(const MachineInstr &MI,
+static bool isUseSafeToFold(const SIInstrInfo *TII,
+                            const MachineInstr &MI,
                             const MachineOperand &UseMO) {
-  return !UseMO.isUndef();
+  return !UseMO.isUndef() && !TII->isSDWA(MI);
   //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
 }
 
@@ -261,7 +262,7 @@ void SIFoldOperands::foldOperand(
   SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
   const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
 
-  if (!isUseSafeToFold(*UseMI, UseOp))
+  if (!isUseSafeToFold(TII, *UseMI, UseOp))
     return;
 
   // FIXME: Fold operands with subregs.
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 4dc090d9b7edc..fae249b04492a 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -55,6 +55,7 @@ private:
 
   std::unordered_map<MachineInstr *, std::unique_ptr<SDWAOperand>> SDWAOperands;
   std::unordered_map<MachineInstr *, SDWAOperandsVector> PotentialMatches;
+  SmallVector<MachineInstr *, 8> ConvertedInstructions;
 
   Optional<int64_t> foldToImm(const MachineOperand &Op) const;
 
@@ -69,6 +70,7 @@ public:
   void matchSDWAOperands(MachineFunction &MF);
   bool isConvertibleToSDWA(const MachineInstr &MI) const;
   bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands);
+  void legalizeScalarOperands(MachineInstr &MI) const;
 
   StringRef getPassName() const override { return "SI Peephole SDWA"; }
 
@@ -289,7 +291,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
   MachineOperand *SrcSel = TII->getNamedOperand(MI, AMDGPU::OpName::src0_sel);
   MachineOperand *SrcMods =
       TII->getNamedOperand(MI, AMDGPU::OpName::src0_modifiers);
-  assert(Src && Src->isReg());
+  assert(Src && (Src->isReg() || Src->isImm()));
   if (!isSameReg(*Src, *getReplacedOperand())) {
     // If this is not src0 then it should be src1
     Src = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
@@ -580,18 +582,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineFunction &MF) {
 }
 
 bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI) const {
-  // Check if this instruction can be converted to SDWA:
-  // 1. Does this opcode support SDWA
-  if (AMDGPU::getSDWAOp(MI.getOpcode()) == -1)
-    return false;
-
-  // 2. Are all operands - VGPRs
-  for (const MachineOperand &Operand : MI.explicit_operands()) {
-    if (!Operand.isReg() || !TRI->isVGPR(*MRI, Operand.getReg()))
-      return false;
-  }
-
-  return true;
+  // Check if this instruction has opcode that supports SDWA
+  return AMDGPU::getSDWAOp(MI.getOpcode()) != -1;
 }
 
 bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
@@ -685,7 +677,9 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     if (PotentialMatches.count(Operand->getParentInst()) == 0)
       Converted |= Operand->convertToSDWA(*SDWAInst, TII);
   }
-  if (!Converted) {
+  if (Converted) {
+    ConvertedInstructions.push_back(SDWAInst);
+  } else {
     SDWAInst->eraseFromParent();
     return false;
   }
@@ -698,6 +692,29 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
   return true;
 }
 
+// If an instruction was converted to SDWA it should not have immediates or SGPR
+// operands. Copy its scalar operands into VGPRs.
+void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI) const {
+  const MCInstrDesc &Desc = TII->get(MI.getOpcode());
+  for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
+    MachineOperand &Op = MI.getOperand(I);
+    if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
+      continue;
+    if (Desc.OpInfo[I].RegClass == -1 ||
+       !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
+      continue;
+    unsigned VGPR = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    auto Copy = BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+                        TII->get(AMDGPU::V_MOV_B32_e32), VGPR);
+    if (Op.isImm())
+      Copy.addImm(Op.getImm());
+    else if (Op.isReg())
+      Copy.addReg(Op.getReg(), Op.isKill() ? RegState::Kill : 0,
+                  Op.getSubReg());
+    Op.ChangeToRegister(VGPR, false);
+  }
+}
+
 bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
 
@@ -728,5 +745,9 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
 
   PotentialMatches.clear();
   SDWAOperands.clear();
+
+  while (!ConvertedInstructions.empty())
+    legalizeScalarOperands(*ConvertedInstructions.pop_back_val());
+
   return false;
 }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 62e774d869da7..949d821e36b20 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -585,7 +585,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
-    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
@@ -603,7 +602,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
-    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
@@ -620,7 +618,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
-    setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
@@ -743,7 +740,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FSQRT,      MVT::f64, Expand);
     setOperationAction(ISD::FSIN,       MVT::f64, Expand);
     setOperationAction(ISD::FCOS,       MVT::f64, Expand);
-    setOperationAction(ISD::FPOWI,      MVT::f64, Expand);
     setOperationAction(ISD::FPOW,       MVT::f64, Expand);
     setOperationAction(ISD::FLOG,       MVT::f64, Expand);
     setOperationAction(ISD::FLOG2,      MVT::f64, Expand);
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 1dffebe97f2db..5ecf9320d5c26 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -2003,7 +2003,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     // Floating point arithmetic/math functions:
     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
-    ISD::FCOS,    ISD::FPOWI,   ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
+    ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index d407774574be1..d855d3e7f7784 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -13,6 +13,7 @@
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "MipsTargetStreamer.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -216,9 +217,15 @@ class MipsAsmParser : public MCTargetAsmParser {
                                unsigned SrcReg, bool Is32BitSym, SMLoc IDLoc,
                                MCStreamer &Out, const MCSubtargetInfo *STI);
 
+  bool emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc, MCSymbol *Sym);
+
   bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
                      MCStreamer &Out, const MCSubtargetInfo *STI);
 
+  bool expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR, bool Is64FPU,
+                         SMLoc IDLoc, MCStreamer &Out,
+                         const MCSubtargetInfo *STI);
+
   bool expandLoadAddress(unsigned DstReg, unsigned BaseReg,
                          const MCOperand &Offset, bool Is32BitAddress,
                          SMLoc IDLoc, MCStreamer &Out,
@@ -1011,6 +1018,16 @@ public:
     Inst.addOperand(MCOperand::createReg(getAFGR64Reg()));
   }
 
+  void addStrictlyAFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getAFGR64Reg()));
+  }
+
+  void addStrictlyFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
+  }
+
   void addFGR64AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getFGR64Reg()));
@@ -1027,6 +1044,15 @@ public:
                     "registers");
   }
 
+  void addStrictlyFGR32AsmRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getFGR32Reg()));
+    // FIXME: We ought to do this for -integrated-as without -via-file-asm too.
+    if (!AsmParser.useOddSPReg() && RegIdx.Index & 1)
+      AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
+                                "registers");
+  }
+
   void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getFGRH32Reg()));
@@ -1574,6 +1600,11 @@ public:
     return isRegIdx() && RegIdx.Kind & RegKind_FGR && RegIdx.Index <= 31;
   }
 
+  bool isStrictlyFGRAsmReg() const {
+    // AFGR64 is $0-$15 but we handle this in getAFGR64()
+    return isRegIdx() && RegIdx.Kind == RegKind_FGR && RegIdx.Index <= 31;
+  }
+
   bool isHWRegsAsmReg() const {
     return isRegIdx() && RegIdx.Kind & RegKind_HWRegs && RegIdx.Index <= 31;
   }
@@ -2368,6 +2399,27 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
   case Mips::PseudoTRUNC_W_D:
     return expandTrunc(Inst, true, true, IDLoc, Out, STI) ? MER_Fail
                                                           : MER_Success;
+
+  case Mips::LoadImmSingleGPR:
+    return expandLoadImmReal(Inst, true, true, false, IDLoc, Out, STI)
+               ? MER_Fail
+               : MER_Success;
+  case Mips::LoadImmSingleFGR:
+    return expandLoadImmReal(Inst, true, false, false, IDLoc, Out, STI)
+               ? MER_Fail
+               : MER_Success;
+  case Mips::LoadImmDoubleGPR:
+    return expandLoadImmReal(Inst, false, true, false, IDLoc, Out, STI)
+               ? MER_Fail
+               : MER_Success;
+  case Mips::LoadImmDoubleFGR:
+      return expandLoadImmReal(Inst, false, false, true, IDLoc, Out, STI)
+               ? MER_Fail
+               : MER_Success;
+  case Mips::LoadImmDoubleFGR_32:
+    return expandLoadImmReal(Inst, false, false, false, IDLoc, Out, STI)
+               ? MER_Fail
+               : MER_Success;
   case Mips::Ulh:
     return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::Ulhu:
@@ -2952,6 +3004,302 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
   return false;
 }
 
+// Each double-precision register DO-D15 overlaps with two of the single
+// precision registers F0-F31. As an example, all of the following hold true:
+// D0 + 1 == F1, F1 + 1 == D1, F1 + 1 == F2, depending on the context.
+static unsigned nextReg(unsigned Reg) {
+  if (MipsMCRegisterClasses[Mips::FGR32RegClassID].contains(Reg))
+    return Reg == (unsigned)Mips::F31 ? (unsigned)Mips::F0 : Reg + 1;
+  switch (Reg) {
+  default: llvm_unreachable("Unknown register in assembly macro expansion!");
+  case Mips::ZERO: return Mips::AT;
+  case Mips::AT:   return Mips::V0;
+  case Mips::V0:   return Mips::V1;
+  case Mips::V1:   return Mips::A0;
+  case Mips::A0:   return Mips::A1;
+  case Mips::A1:   return Mips::A2;
+  case Mips::A2:   return Mips::A3;
+  case Mips::A3:   return Mips::T0;
+  case Mips::T0:   return Mips::T1;
+  case Mips::T1:   return Mips::T2;
+  case Mips::T2:   return Mips::T3;
+  case Mips::T3:   return Mips::T4;
+  case Mips::T4:   return Mips::T5;
+  case Mips::T5:   return Mips::T6;
+  case Mips::T6:   return Mips::T7;
+  case Mips::T7:   return Mips::S0;
+  case Mips::S0:   return Mips::S1;
+  case Mips::S1:   return Mips::S2;
+  case Mips::S2:   return Mips::S3;
+  case Mips::S3:   return Mips::S4;
+  case Mips::S4:   return Mips::S5;
+  case Mips::S5:   return Mips::S6;
+  case Mips::S6:   return Mips::S7;
+  case Mips::S7:   return Mips::T8;
+  case Mips::T8:   return Mips::T9;
+  case Mips::T9:   return Mips::K0;
+  case Mips::K0:   return Mips::K1;
+  case Mips::K1:   return Mips::GP;
+  case Mips::GP:   return Mips::SP;
+  case Mips::SP:   return Mips::FP;
+  case Mips::FP:   return Mips::RA;
+  case Mips::RA:   return Mips::ZERO;
+  case Mips::D0:   return Mips::F1;
+  case Mips::D1:   return Mips::F3;
+  case Mips::D2:   return Mips::F5;
+  case Mips::D3:   return Mips::F7;
+  case Mips::D4:   return Mips::F9;
+  case Mips::D5:   return Mips::F11;
+  case Mips::D6:   return Mips::F13;
+  case Mips::D7:   return Mips::F15;
+  case Mips::D8:   return Mips::F17;
+  case Mips::D9:   return Mips::F19;
+  case Mips::D10:   return Mips::F21;
+  case Mips::D11:   return Mips::F23;
+  case Mips::D12:   return Mips::F25;
+  case Mips::D13:   return Mips::F27;
+  case Mips::D14:   return Mips::F29;
+  case Mips::D15:   return Mips::F31;
+  }
+}
+
+// FIXME: This method is too general. In principle we should compute the number
+// of instructions required to synthesize the immediate inline compared to
+// synthesizing the address inline and relying on non .text sections.
+// For static O32 and N32 this may yield a small benefit, for static N64 this is
+// likely to yield a much larger benefit as we have to synthesize a 64bit
+// address to load a 64 bit value.
+bool MipsAsmParser::emitPartialAddress(MipsTargetStreamer &TOut, SMLoc IDLoc,
+                                       MCSymbol *Sym) {
+  unsigned ATReg = getATReg(IDLoc);
+  if (!ATReg)
+    return true;
+
+  if(IsPicEnabled) {
+    const MCExpr *GotSym =
+        MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    const MipsMCExpr *GotExpr =
+        MipsMCExpr::create(MipsMCExpr::MEK_GOT, GotSym, getContext());
+
+    if(isABI_O32() || isABI_N32()) {
+      TOut.emitRRX(Mips::LW, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+                   IDLoc, STI);
+    } else { //isABI_N64()
+      TOut.emitRRX(Mips::LD, ATReg, Mips::GP, MCOperand::createExpr(GotExpr),
+                   IDLoc, STI);
+    }
+  } else { //!IsPicEnabled
+    const MCExpr *HiSym =
+        MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    const MipsMCExpr *HiExpr =
+        MipsMCExpr::create(MipsMCExpr::MEK_HI, HiSym, getContext());
+
+    // FIXME: This is technically correct but gives a different result to gas,
+    // but gas is incomplete there (it has a fixme noting it doesn't work with
+    // 64-bit addresses).
+    // FIXME: With -msym32 option, the address expansion for N64 should probably
+    // use the O32 / N32 case. It's safe to use the 64 address expansion as the
+    // symbol's value is considered sign extended.
+    if(isABI_O32() || isABI_N32()) {
+      TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc, STI);
+    } else { //isABI_N64()
+      const MCExpr *HighestSym =
+          MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+      const MipsMCExpr *HighestExpr =
+          MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, HighestSym, getContext());
+      const MCExpr *HigherSym =
+          MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+      const MipsMCExpr *HigherExpr =
+          MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, HigherSym, getContext());
+
+      TOut.emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc,
+                  STI);
+      TOut.emitRRX(Mips::DADDiu, ATReg, ATReg,
+                   MCOperand::createExpr(HigherExpr), IDLoc, STI);
+      TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI);
+      TOut.emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr),
+                   IDLoc, STI);
+      TOut.emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, STI);
+    }
+  }
+  return false;
+}
+
+bool MipsAsmParser::expandLoadImmReal(MCInst &Inst, bool IsSingle, bool IsGPR,
+                                      bool Is64FPU, SMLoc IDLoc,
+                                      MCStreamer &Out,
+                                      const MCSubtargetInfo *STI) {
+  MipsTargetStreamer &TOut = getTargetStreamer();
+  assert(Inst.getNumOperands() == 2 && "Invalid operand count");
+  assert(Inst.getOperand(0).isReg() && Inst.getOperand(1).isImm() &&
+         "Invalid instruction operand.");
+
+  unsigned FirstReg = Inst.getOperand(0).getReg();
+  uint64_t ImmOp64 = Inst.getOperand(1).getImm();
+
+  uint32_t HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32;
+  // If ImmOp64 is AsmToken::Integer type (all bits set to zero in the
+  // exponent field), convert it to double (e.g. 1 to 1.0)
+  if ((HiImmOp64 & 0x7ff00000) == 0) {
+    APFloat RealVal(APFloat::IEEEdouble(), ImmOp64);
+    ImmOp64 = RealVal.bitcastToAPInt().getZExtValue();
+  }
+
+  uint32_t LoImmOp64 = ImmOp64 & 0xffffffff;
+  HiImmOp64 = (ImmOp64 & 0xffffffff00000000) >> 32;
+
+  if (IsSingle) {
+    // Conversion of a double in an uint64_t to a float in a uint32_t,
+    // retaining the bit pattern of a float.
+    uint32_t ImmOp32;
+    double doubleImm = BitsToDouble(ImmOp64);
+    float tmp_float = static_cast<float>(doubleImm);
+    ImmOp32 = FloatToBits(tmp_float);
+
+    if (IsGPR) {
+      if (loadImmediate(ImmOp32, FirstReg, Mips::NoRegister, true, true, IDLoc,
+                        Out, STI))
+        return true;
+      return false;
+    } else {
+      unsigned ATReg = getATReg(IDLoc);
+      if (!ATReg)
+        return true;
+      if (LoImmOp64 == 0) {
+        if (loadImmediate(ImmOp32, ATReg, Mips::NoRegister, true, true, IDLoc,
+                          Out, STI))
+          return true;
+        TOut.emitRR(Mips::MTC1, FirstReg, ATReg, IDLoc, STI);
+        return false;
+      }
+
+      MCSection *CS = getStreamer().getCurrentSectionOnly();
+      // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
+      // where appropriate.
+      MCSection *ReadOnlySection = getContext().getELFSection(
+          ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+      MCSymbol *Sym = getContext().createTempSymbol();
+      const MCExpr *LoSym =
+          MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+      const MipsMCExpr *LoExpr =
+          MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+      getStreamer().SwitchSection(ReadOnlySection);
+      getStreamer().EmitLabel(Sym, IDLoc);
+      getStreamer().EmitIntValue(ImmOp32, 4);
+      getStreamer().SwitchSection(CS);
+
+      if(emitPartialAddress(TOut, IDLoc, Sym))
+        return true;
+      TOut.emitRRX(Mips::LWC1, FirstReg, ATReg,
+                   MCOperand::createExpr(LoExpr), IDLoc, STI);
+    }
+    return false;
+  }
+
+  // if(!IsSingle)
+  unsigned ATReg = getATReg(IDLoc);
+  if (!ATReg)
+    return true;
+
+  if (IsGPR) {
+    if (LoImmOp64 == 0) {
+      if(isABI_N32() || isABI_N64()) {
+        if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, false, true,
+                          IDLoc, Out, STI))
+          return true;
+        return false;
+      } else {
+        if (loadImmediate(HiImmOp64, FirstReg, Mips::NoRegister, true, true,
+                        IDLoc, Out, STI))
+          return true;
+
+        if (loadImmediate(0, nextReg(FirstReg), Mips::NoRegister, true, true,
+                        IDLoc, Out, STI))
+          return true;
+        return false;
+      }
+    }
+
+    MCSection *CS = getStreamer().getCurrentSectionOnly();
+    MCSection *ReadOnlySection = getContext().getELFSection(
+        ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+    MCSymbol *Sym = getContext().createTempSymbol();
+    const MCExpr *LoSym =
+        MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    const MipsMCExpr *LoExpr =
+        MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+    getStreamer().SwitchSection(ReadOnlySection);
+    getStreamer().EmitLabel(Sym, IDLoc);
+    getStreamer().EmitIntValue(HiImmOp64, 4);
+    getStreamer().EmitIntValue(LoImmOp64, 4);
+    getStreamer().SwitchSection(CS);
+
+    if(emitPartialAddress(TOut, IDLoc, Sym))
+      return true;
+    if(isABI_N64())
+      TOut.emitRRX(Mips::DADDiu, ATReg, ATReg,
+                   MCOperand::createExpr(LoExpr), IDLoc, STI);
+    else
+      TOut.emitRRX(Mips::ADDiu, ATReg, ATReg,
+                   MCOperand::createExpr(LoExpr), IDLoc, STI);
+
+    if(isABI_N32() || isABI_N64())
+      TOut.emitRRI(Mips::LD, FirstReg, ATReg, 0, IDLoc, STI);
+    else {
+      TOut.emitRRI(Mips::LW, FirstReg, ATReg, 0, IDLoc, STI);
+      TOut.emitRRI(Mips::LW, nextReg(FirstReg), ATReg, 4, IDLoc, STI);
+    }
+    return false;
+  } else { // if(!IsGPR && !IsSingle)
+    if ((LoImmOp64 == 0) &&
+        !((HiImmOp64 & 0xffff0000) && (HiImmOp64 & 0x0000ffff))) {
+      // FIXME: In the case where the constant is zero, we can load the
+      // register directly from the zero register.
+      if (loadImmediate(HiImmOp64, ATReg, Mips::NoRegister, true, true, IDLoc,
+                        Out, STI))
+        return true;
+      if (isABI_N32() || isABI_N64())
+        TOut.emitRR(Mips::DMTC1, FirstReg, ATReg, IDLoc, STI);
+      else if (hasMips32r2()) {
+        TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
+        TOut.emitRRR(Mips::MTHC1_D32, FirstReg, FirstReg, ATReg, IDLoc, STI);
+      } else {
+        TOut.emitRR(Mips::MTC1, nextReg(FirstReg), ATReg, IDLoc, STI);
+        TOut.emitRR(Mips::MTC1, FirstReg, Mips::ZERO, IDLoc, STI);
+      }
+      return false;
+    }
+
+    MCSection *CS = getStreamer().getCurrentSectionOnly();
+    // FIXME: Enhance this expansion to use the .lit4 & .lit8 sections
+    // where appropriate.
+    MCSection *ReadOnlySection = getContext().getELFSection(
+        ".rodata", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+    MCSymbol *Sym = getContext().createTempSymbol();
+    const MCExpr *LoSym =
+        MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    const MipsMCExpr *LoExpr =
+        MipsMCExpr::create(MipsMCExpr::MEK_LO, LoSym, getContext());
+
+    getStreamer().SwitchSection(ReadOnlySection);
+    getStreamer().EmitLabel(Sym, IDLoc);
+    getStreamer().EmitIntValue(HiImmOp64, 4);
+    getStreamer().EmitIntValue(LoImmOp64, 4);
+    getStreamer().SwitchSection(CS);
+
+    if(emitPartialAddress(TOut, IDLoc, Sym))
+      return true;
+    TOut.emitRRX(Is64FPU ? Mips::LDC164 : Mips::LDC1, FirstReg, ATReg,
+                 MCOperand::createExpr(LoExpr), IDLoc, STI);
+  }
+  return false;
+}
+
 bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
                                                MCStreamer &Out,
                                                const MCSubtargetInfo *STI) {
@@ -4318,45 +4666,6 @@ bool MipsAsmParser::expandDMULMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
   return false;
 }
 
-static unsigned nextReg(unsigned Reg) {
-  switch (Reg) {
-  case Mips::ZERO: return Mips::AT;
-  case Mips::AT:   return Mips::V0;
-  case Mips::V0:   return Mips::V1;
-  case Mips::V1:   return Mips::A0;
-  case Mips::A0:   return Mips::A1;
-  case Mips::A1:   return Mips::A2;
-  case Mips::A2:   return Mips::A3;
-  case Mips::A3:   return Mips::T0;
-  case Mips::T0:   return Mips::T1;
-  case Mips::T1:   return Mips::T2;
-  case Mips::T2:   return Mips::T3;
-  case Mips::T3:   return Mips::T4;
-  case Mips::T4:   return Mips::T5;
-  case Mips::T5:   return Mips::T6;
-  case Mips::T6:   return Mips::T7;
-  case Mips::T7:   return Mips::S0;
-  case Mips::S0:   return Mips::S1;
-  case Mips::S1:   return Mips::S2;
-  case Mips::S2:   return Mips::S3;
-  case Mips::S3:   return Mips::S4;
-  case Mips::S4:   return Mips::S5;
-  case Mips::S5:   return Mips::S6;
-  case Mips::S6:   return Mips::S7;
-  case Mips::S7:   return Mips::T8;
-  case Mips::T8:   return Mips::T9;
-  case Mips::T9:   return Mips::K0;
-  case Mips::K0:   return Mips::K1;
-  case Mips::K1:   return Mips::GP;
-  case Mips::GP:   return Mips::SP;
-  case Mips::SP:   return Mips::FP;
-  case Mips::FP:   return Mips::RA;
-  case Mips::RA:   return Mips::ZERO;
-  default:         return 0;
-  }
-
-}
-
 // Expand 'ld $<reg> offset($reg2)' to 'lw $<reg>, offset($reg2);
 //                                      lw $<reg+1>>, offset+4($reg2)'
 // or expand 'sd $<reg> offset($reg2)' to 'sw $<reg>, offset($reg2);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 8fe4e75f3e18b..760630c411768 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -362,7 +362,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::FCOS,              MVT::f64,   Expand);
   setOperationAction(ISD::FSINCOS,           MVT::f32,   Expand);
   setOperationAction(ISD::FSINCOS,           MVT::f64,   Expand);
-  setOperationAction(ISD::FPOWI,             MVT::f32,   Expand);
   setOperationAction(ISD::FPOW,              MVT::f32,   Expand);
   setOperationAction(ISD::FPOW,              MVT::f64,   Expand);
   setOperationAction(ISD::FLOG,              MVT::f32,   Expand);
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index df42d56d041bc..d81a769d7fd97 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -681,6 +681,29 @@ def PseudoTRUNC_W_D : MipsAsmPseudoInst<(outs FGR32Opnd:$fd),
                                         "trunc.w.d\t$fd, $fs, $rs">,
                       FGR_64, HARDFLOAT;
 
+def LoadImmSingleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                         (ins imm64:$fpimm),
+                                         "li.s\t$rd, $fpimm">;
+
+def LoadImmSingleFGR : MipsAsmPseudoInst<(outs StrictlyFGR32Opnd:$rd),
+                                         (ins imm64:$fpimm),
+                                         "li.s\t$rd, $fpimm">,
+                       HARDFLOAT;
+
+def LoadImmDoubleGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                         (ins imm64:$fpimm),
+                                         "li.d\t$rd, $fpimm">;
+
+def LoadImmDoubleFGR_32 : MipsAsmPseudoInst<(outs StrictlyAFGR64Opnd:$rd),
+                                            (ins imm64:$fpimm),
+                                            "li.d\t$rd, $fpimm">,
+                          FGR_32, HARDFLOAT;
+
+def LoadImmDoubleFGR : MipsAsmPseudoInst<(outs StrictlyFGR64Opnd:$rd),
+                                         (ins imm64:$fpimm),
+                                         "li.d\t$rd, $fpimm">,
+                       FGR_64, HARDFLOAT;
+
 //===----------------------------------------------------------------------===//
 // InstAliases.
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index ccfdcc89b078a..08fb3d7d43525 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -552,16 +552,31 @@ def AFGR64AsmOperand : MipsAsmRegOperand {
   let PredicateMethod = "isFGRAsmReg";
 }
 
+def StrictlyAFGR64AsmOperand : MipsAsmRegOperand {
+  let Name = "StrictlyAFGR64AsmReg";
+  let PredicateMethod = "isStrictlyFGRAsmReg";
+}
+
 def FGR64AsmOperand : MipsAsmRegOperand {
   let Name = "FGR64AsmReg";
   let PredicateMethod = "isFGRAsmReg";
 }
 
+def StrictlyFGR64AsmOperand : MipsAsmRegOperand {
+  let Name = "StrictlyFGR64AsmReg";
+  let PredicateMethod = "isStrictlyFGRAsmReg";
+}
+
 def FGR32AsmOperand : MipsAsmRegOperand {
   let Name = "FGR32AsmReg";
   let PredicateMethod = "isFGRAsmReg";
 }
 
+def StrictlyFGR32AsmOperand : MipsAsmRegOperand {
+  let Name = "StrictlyFGR32AsmReg";
+  let PredicateMethod = "isStrictlyFGRAsmReg";
+}
+
 def FGRH32AsmOperand : MipsAsmRegOperand {
   let Name = "FGRH32AsmReg";
   let PredicateMethod = "isFGRAsmReg";
@@ -639,14 +654,26 @@ def AFGR64Opnd : RegisterOperand<AFGR64> {
   let ParserMatchClass = AFGR64AsmOperand;
 }
 
+def StrictlyAFGR64Opnd : RegisterOperand<AFGR64> {
+  let ParserMatchClass = StrictlyAFGR64AsmOperand;
+}
+
 def FGR64Opnd : RegisterOperand<FGR64> {
   let ParserMatchClass = FGR64AsmOperand;
 }
 
+def StrictlyFGR64Opnd : RegisterOperand<FGR64> {
+  let ParserMatchClass = StrictlyFGR64AsmOperand;
+}
+
 def FGR32Opnd : RegisterOperand<FGR32> {
   let ParserMatchClass = FGR32AsmOperand;
 }
 
+def StrictlyFGR32Opnd : RegisterOperand<FGR32> {
+  let ParserMatchClass = StrictlyFGR32AsmOperand;
+}
+
 def FGRCCOpnd : RegisterOperand<FGRCC> {
   // The assembler doesn't use register classes so we can re-use
   // FGR32AsmOperand.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index b90a5ee28342f..216efcc4a1ee8 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -539,7 +539,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::FSIN, VT, Expand);
       setOperationAction(ISD::FCOS, VT, Expand);
       setOperationAction(ISD::FABS, VT, Expand);
-      setOperationAction(ISD::FPOWI, VT, Expand);
       setOperationAction(ISD::FFLOOR, VT, Expand);
       setOperationAction(ISD::FCEIL,  VT, Expand);
       setOperationAction(ISD::FTRUNC, VT, Expand);
@@ -798,7 +797,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
-    setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
@@ -844,7 +842,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
-    setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
index 6bdfd4d07edce..c5f324418da51 100644
--- a/lib/Target/SystemZ/SystemZ.td
+++ b/lib/Target/SystemZ/SystemZ.td
@@ -54,6 +54,8 @@ include "SystemZInstrFormats.td"
 include "SystemZInstrInfo.td"
 include "SystemZInstrVector.td"
 include "SystemZInstrFP.td"
+include "SystemZInstrHFP.td"
+include "SystemZInstrDFP.td"
 
 def SystemZInstrInfo : InstrInfo {}
 
diff --git a/lib/Target/SystemZ/SystemZFeatures.td b/lib/Target/SystemZ/SystemZFeatures.td
index 7bfa378aa85c2..ffb0b8d1c861c 100644
--- a/lib/Target/SystemZ/SystemZFeatures.td
+++ b/lib/Target/SystemZ/SystemZFeatures.td
@@ -115,12 +115,18 @@ def FeatureTransactionalExecution : SystemZFeature<
   "Assume that the transactional-execution facility is installed"
 >;
 
+def FeatureDFPZonedConversion : SystemZFeature<
+  "dfp-zoned-conversion", "DFPZonedConversion",
+  "Assume that the DFP zoned-conversion facility is installed"
+>;
+
 def Arch10NewFeatures : SystemZFeatureList<[
     FeatureExecutionHint,
     FeatureLoadAndTrap,
     FeatureMiscellaneousExtensions,
     FeatureProcessorAssist,
-    FeatureTransactionalExecution
+    FeatureTransactionalExecution,
+    FeatureDFPZonedConversion
 ]>;
 
 //===----------------------------------------------------------------------===//
@@ -144,6 +150,11 @@ def FeatureMessageSecurityAssist5 : SystemZFeature<
   "Assume that the message-security-assist extension facility 5 is installed"
 >;
 
+def FeatureDFPPackedConversion : SystemZFeature<
+  "dfp-packed-conversion", "DFPPackedConversion",
+  "Assume that the DFP packed-conversion facility is installed"
+>;
+
 def FeatureVector : SystemZFeature<
   "vector", "Vector",
   "Assume that the vectory facility is installed"
@@ -154,6 +165,7 @@ def Arch11NewFeatures : SystemZFeatureList<[
     FeatureLoadAndZeroRightmostByte,
     FeatureLoadStoreOnCond2,
     FeatureMessageSecurityAssist5,
+    FeatureDFPPackedConversion,
     FeatureVector
 ]>;
 
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 235e095f00100..ae141dbcad34a 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4189,12 +4189,20 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
   if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
     return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
 
+  // If all elements are loads, use VLREP/VLEs (below).
+  bool AllLoads = true;
+  for (auto Elem : Elems)
+    if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
+      AllLoads = false;
+      break;
+    }
+
   // The best way of building a v2i64 from two i64s is to use VLVGP.
-  if (VT == MVT::v2i64)
+  if (VT == MVT::v2i64 && !AllLoads)
     return joinDwords(DAG, DL, Elems[0], Elems[1]);
 
   // Use a 64-bit merge high to combine two doubles.
-  if (VT == MVT::v2f64)
+  if (VT == MVT::v2f64 && !AllLoads)
     return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
 
   // Build v4f32 values directly from the FPRs:
@@ -4204,7 +4212,7 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
   //      <ABxx>         <CDxx>
   //                V                 VMRHG
   //              <ABCD>
-  if (VT == MVT::v4f32) {
+  if (VT == MVT::v4f32 && !AllLoads) {
     SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
     SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
     // Avoid unnecessary undefs by reusing the other operand.
@@ -4246,23 +4254,37 @@ static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
         Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
     Result = DAG.getBuildVector(VT, DL, Constants);
   } else {
-    // Otherwise try to use VLVGP to start the sequence in order to
+    // Otherwise try to use VLREP or VLVGP to start the sequence in order to
     // avoid a false dependency on any previous contents of the vector
-    // register.  This only makes sense if one of the associated elements
-    // is defined.
-    unsigned I1 = NumElements / 2 - 1;
-    unsigned I2 = NumElements - 1;
-    bool Def1 = !Elems[I1].isUndef();
-    bool Def2 = !Elems[I2].isUndef();
-    if (Def1 || Def2) {
-      SDValue Elem1 = Elems[Def1 ? I1 : I2];
-      SDValue Elem2 = Elems[Def2 ? I2 : I1];
-      Result = DAG.getNode(ISD::BITCAST, DL, VT,
-                           joinDwords(DAG, DL, Elem1, Elem2));
-      Done[I1] = true;
-      Done[I2] = true;
-    } else
-      Result = DAG.getUNDEF(VT);
+    // register.
+
+    // Use a VLREP if at least one element is a load.
+    unsigned LoadElIdx = UINT_MAX;
+    for (unsigned I = 0; I < NumElements; ++I)
+      if (Elems[I].getOpcode() == ISD::LOAD &&
+          cast<LoadSDNode>(Elems[I])->isUnindexed()) {
+        LoadElIdx = I;
+        break;
+      }
+    if (LoadElIdx != UINT_MAX) {
+      Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, Elems[LoadElIdx]);
+      Done[LoadElIdx] = true;
+    } else {
+      // Try to use VLVGP.
+      unsigned I1 = NumElements / 2 - 1;
+      unsigned I2 = NumElements - 1;
+      bool Def1 = !Elems[I1].isUndef();
+      bool Def2 = !Elems[I2].isUndef();
+      if (Def1 || Def2) {
+        SDValue Elem1 = Elems[Def1 ? I1 : I2];
+        SDValue Elem2 = Elems[Def2 ? I2 : I1];
+        Result = DAG.getNode(ISD::BITCAST, DL, VT,
+                             joinDwords(DAG, DL, Elem1, Elem2));
+        Done[I1] = true;
+        Done[I2] = true;
+      } else
+        Result = DAG.getUNDEF(VT);
+    }
   }
 
   // Use VLVGx to insert the other elements.
diff --git a/lib/Target/SystemZ/SystemZInstrDFP.td b/lib/Target/SystemZ/SystemZInstrDFP.td
new file mode 100644
index 0000000000000..08ab2d7bbc523
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrDFP.td
@@ -0,0 +1,231 @@
+//==- SystemZInstrDFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ decimal floating-point
+// arithmetic.  These instructions are inot currently used for code generation,
+// are provided for use with the assembler and disassembler only.  If LLVM
+// ever supports decimal floating-point types (_Decimal64 etc.), they can
+// also be used for code generation for those types.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and test.
+let Defs = [CC] in {
+  def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64,  FP64>;
+  def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations.  The destination
+// of LDXTR is a 128-bit value, but only the first register of the pair is used.
+def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32,  FP64>;
+def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+
+// Extend floating-point values to wider representations.
+def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64,  FP32>;
+def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+
+// Convert a signed integer value to a floating-point one.
+def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64,  GR64>;
+def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
+let Predicates = [FeatureFPExtension] in {
+  def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64,  GR64>;
+  def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
+  def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64,  GR32>;
+  def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+}
+
+// Convert an unsigned integer value to a floating-point one.
+let Predicates = [FeatureFPExtension] in {
+  def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64,  GR64>;
+  def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>;
+  def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64,  GR32>;
+  def CXLFTR : TernaryRRFe<"cxlftr", 0xB95B, FP128, GR32>;
+}
+
+// Convert a floating-point value to a signed integer value.
+let Defs = [CC] in {
+  def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>;
+  def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>;
+  let Predicates = [FeatureFPExtension] in {
+    def CGDTRA : TernaryRRFe<"cgdtra", 0xB3E1, GR64, FP64>;
+    def CGXTRA : TernaryRRFe<"cgxtra", 0xB3E9, GR64, FP128>;
+    def CFDTR : TernaryRRFe<"cfdtr", 0xB941, GR32, FP64>;
+    def CFXTR : TernaryRRFe<"cfxtr", 0xB949, GR32, FP128>;
+  }
+}
+
+// Convert a floating-point value to an unsigned integer value.
+let Defs = [CC] in {
+  let Predicates = [FeatureFPExtension] in {
+    def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>;
+    def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>;
+    def CLFDTR : TernaryRRFe<"clfdtr", 0xB943, GR32, FP64>;
+    def CLFXTR : TernaryRRFe<"clfxtr", 0xB94B, GR32, FP128>;
+  }
+}
+
+// Convert a packed value to a floating-point one.
+def CDSTR : UnaryRRE<"cdstr", 0xB3F3, null_frag, FP64,  GR64>;
+def CXSTR : UnaryRRE<"cxstr", 0xB3FB, null_frag, FP128, GR128>;
+def CDUTR : UnaryRRE<"cdutr", 0xB3F2, null_frag, FP64,  GR64>;
+def CXUTR : UnaryRRE<"cxutr", 0xB3FA, null_frag, FP128, GR128>;
+
+// Convert a floating-point value to a packed value.
+def CSDTR : BinaryRRFd<"csdtr", 0xB3E3, GR64,  FP64>;
+def CSXTR : BinaryRRFd<"csxtr", 0xB3EB, GR128, FP128>;
+def CUDTR : UnaryRRE<"cudtr", 0xB3E2, null_frag, GR64,  FP64>;
+def CUXTR : UnaryRRE<"cuxtr", 0xB3EA, null_frag, GR128, FP128>;
+
+// Convert from/to memory values in the zoned format.
+let Predicates = [FeatureDFPZonedConversion] in {
+  def CDZT : BinaryRSL<"cdzt", 0xEDAA, FP64>;
+  def CXZT : BinaryRSL<"cxzt", 0xEDAB, FP128>;
+  def CZDT : StoreBinaryRSL<"czdt", 0xEDA8, FP64>;
+  def CZXT : StoreBinaryRSL<"czxt", 0xEDA9, FP128>;
+}
+
+// Convert from/to memory values in the packed format.
+let Predicates = [FeatureDFPPackedConversion] in {
+  def CDPT : BinaryRSL<"cdpt", 0xEDAE, FP64>;
+  def CXPT : BinaryRSL<"cxpt", 0xEDAF, FP128>;
+  def CPDT : StoreBinaryRSL<"cpdt", 0xEDAC, FP64>;
+  def CPXT : StoreBinaryRSL<"cpxt", 0xEDAD, FP128>;
+}
+
+// Perform floating-point operation.
+let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
+  def PFPO : SideEffectInherentE<"pfpo", 0x010A>;
+
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Round to an integer, with the second operand (M3) specifying the rounding
+// mode.  M4 can be set to 4 to suppress detection of inexact conditions.
+def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64,  FP64>;
+def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+
+// Extract biased exponent.
+def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64,  FP64>;
+def EEXTR : UnaryRRE<"eextr", 0xB3ED, null_frag, FP128, FP128>;
+
+// Extract significance.
+def ESDTR : UnaryRRE<"esdtr", 0xB3E7, null_frag, FP64,  FP64>;
+def ESXTR : UnaryRRE<"esxtr", 0xB3EF, null_frag, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [CC] in {
+  let isCommutable = 1 in {
+    def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64,  FP64,  FP64>;
+    def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>;
+  }
+  let Predicates = [FeatureFPExtension] in {
+    def ADTRA : TernaryRRFa<"adtra", 0xB3D2, FP64,  FP64,  FP64>;
+    def AXTRA : TernaryRRFa<"axtra", 0xB3DA, FP128, FP128, FP128>;
+  }
+}
+
+// Subtraction.
+let Defs = [CC] in {
+  def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64,  FP64,  FP64>;
+  def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>;
+  let Predicates = [FeatureFPExtension] in {
+    def SDTRA : TernaryRRFa<"sdtra", 0xB3D3, FP64,  FP64,  FP64>;
+    def SXTRA : TernaryRRFa<"sxtra", 0xB3DB, FP128, FP128, FP128>;
+  }
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+  def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64,  FP64,  FP64>;
+  def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
+}
+let Predicates = [FeatureFPExtension] in {
+  def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64,  FP64,  FP64>;
+  def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+}
+
+// Division.
+def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64,  FP64,  FP64>;
+def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
+let Predicates = [FeatureFPExtension] in {
+  def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64,  FP64,  FP64>;
+  def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+}
+
+// Quantize.
+def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64,  FP64,  FP64>;
+def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+
+// Reround.
+def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64,  FP64,  FP64>;
+def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+
+// Shift significand left/right.
+def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64,  FP64,  null_frag, 0>;
+def SLXT : BinaryRXF<"slxt", 0xED48, null_frag, FP128, FP128, null_frag, 0>;
+def SRDT : BinaryRXF<"srdt", 0xED41, null_frag, FP64,  FP64,  null_frag, 0>;
+def SRXT : BinaryRXF<"srxt", 0xED49, null_frag, FP128, FP128, null_frag, 0>;
+
+// Insert biased exponent.
+def IEDTR : BinaryRRFb<"iedtr", 0xB3F6, null_frag, FP64,  FP64,   FP64>;
+def IEXTR : BinaryRRFb<"iextr", 0xB3FE, null_frag, FP128, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare.
+let Defs = [CC] in {
+  def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64,  FP64>;
+  def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>;
+}
+
+// Compare and signal.
+let Defs = [CC] in {
+  def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64,  FP64>;
+  def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>;
+}
+
+// Compare biased exponent.
+let Defs = [CC] in {
+  def CEDTR : CompareRRE<"cedtr", 0xB3F4, null_frag, FP64,  FP64>;
+  def CEXTR : CompareRRE<"cextr", 0xB3FC, null_frag, FP128, FP128>;
+}
+
+// Test Data Class.
+let Defs = [CC] in {
+  def TDCET : TestRXE<"tdcet", 0xED50, null_frag, FP32>;
+  def TDCDT : TestRXE<"tdcdt", 0xED54, null_frag, FP64>;
+  def TDCXT : TestRXE<"tdcxt", 0xED58, null_frag, FP128>;
+}
+
+// Test Data Group.
+let Defs = [CC] in {
+  def TDGET : TestRXE<"tdget", 0xED51, null_frag, FP32>;
+  def TDGDT : TestRXE<"tdgdt", 0xED55, null_frag, FP64>;
+  def TDGXT : TestRXE<"tdgxt", 0xED59, null_frag, FP128>;
+}
+
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 364b81f98eed6..10172bd452034 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -121,7 +121,8 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
   defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
 
   // For z13 we prefer LDE over LE to avoid partial register dependencies.
-  def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
+  let isCodeGenOnly = 1 in
+    def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
 
   // These instructions are split after register allocation, so we don't
   // want a custom inserter.
@@ -437,18 +438,18 @@ def : Pat<(fmul (f128 (fpextend FP64:$src1)),
                 bdxaddr12only:$addr)>;
 
 // Fused multiply-add.
-def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>;
-def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>;
+def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
+def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
 
-def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>;
-def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>;
+def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
+def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
 
 // Fused multiply-subtract.
-def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>;
-def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>;
+def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
+def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
 
-def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>;
-def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>;
+def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
+def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
 
 // Division.
 def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32,  FP32>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index a37da28078540..5f6115ed86a47 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -527,6 +527,22 @@ class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{3-0}   = R2;
 }
 
+class InstRRFd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<4> M4;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = 0;
+  let Inst{11-8}  = M4;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
 class InstRRFe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
@@ -725,6 +741,22 @@ class InstRSLa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{7-0}   = op{7-0};
 }
 
+class InstRSLb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<24> BDL2;
+  bits<4> M3;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-16} = BDL2;
+  let Inst{15-12} = R1;
+  let Inst{11-8}  = M3;
+  let Inst{7-0}   = op{7-0};
+}
+
 class InstRSYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
@@ -2752,6 +2784,15 @@ class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let DisableEncoding = "$R1src";
 }
 
+class BinaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R3, cls2:$R2),
+            mnemonic#"\t$R1, $R3, $R2",
+            [(set cls1:$R1, (operator cls2:$R3, cls2:$R2))]> {
+  let OpKey = mnemonic#cls;
+  let OpType = "reg";
+}
+
 class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls1, RegisterOperand cls2,
                  RegisterOperand cls3>
@@ -2808,6 +2849,11 @@ multiclass BinaryMemRRFcOpt<string mnemonic, bits<16> opcode,
   def Opt : UnaryMemRRFc<mnemonic, opcode, cls1, cls2>;
 }
 
+class BinaryRRFd<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                RegisterOperand cls2>
+  : InstRRFd<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M4),
+             mnemonic#"\t$R1, $R2, $M4", []>;
+
 class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
                 RegisterOperand cls2>
   : InstRRFe<opcode, (outs cls1:$R1), (ins imm32zx4:$M3, cls2:$R2),
@@ -2958,6 +3004,13 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
   }
 }
 
+class BinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRSLb<opcode, (outs cls:$R1),
+             (ins bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $BDL2, $M3", []> {
+  let mayLoad = 1;
+}
+
 class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
                AddressingMode mode = bdxaddr12only>
@@ -2987,6 +3040,18 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M3 = 0;
 }
 
+class BinaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                RegisterOperand cls1, RegisterOperand cls2,
+                SDPatternOperator load, bits<5> bytes>
+  : InstRXF<opcode, (outs cls1:$R1), (ins cls2:$R3, bdxaddr12only:$XBD2),
+            mnemonic#"\t$R1, $R3, $XBD2",
+            [(set cls1:$R1, (operator cls2:$R3, (load bdxaddr12only:$XBD2)))]> {
+  let OpKey = mnemonic#"r"#cls;
+  let OpType = "mem";
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+}
+
 class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
                 AddressingMode mode = bdxaddr20only>
@@ -3294,6 +3359,13 @@ multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode,
   }
 }
 
+class StoreBinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRSLb<opcode, (outs),
+             (ins cls:$R1, bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $BDL2, $M3", []> {
+  let mayStore = 1;
+}
+
 class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
                      Immediate index>
   : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
@@ -3581,6 +3653,12 @@ class SideEffectTernarySSF<string mnemonic, bits<12> opcode,
             (ins bdaddr12only:$BD1, bdaddr12only:$BD2, cls:$R3),
             mnemonic#"\t$BD1, $BD2, $R3", []>;
 
+class TernaryRRFa<string mnemonic, bits<16> opcode,
+                 RegisterOperand cls1, RegisterOperand cls2,
+                 RegisterOperand cls3>
+  : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3, imm32zx4:$M4),
+             mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
 class TernaryRRFb<string mnemonic, bits<16> opcode,
                   RegisterOperand cls1, RegisterOperand cls2,
                   RegisterOperand cls3>
@@ -3597,11 +3675,11 @@ class TernaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
              (ins imm32zx4:$M3, cls2:$R2, imm32zx4:$M4),
              mnemonic#"\t$R1, $M3, $R2, $M4", []>;
 
-class TernaryRRD<string mnemonic, bits<16> opcode,
-                 SDPatternOperator operator, RegisterOperand cls>
-  : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
+class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRD<opcode, (outs cls1:$R1), (ins cls2:$R1src, cls2:$R3, cls2:$R2),
             mnemonic#"\t$R1, $R3, $R2",
-            [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> {
+            [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3, cls2:$R2))]> {
   let OpKey = mnemonic#cls;
   let OpType = "reg";
   let Constraints = "$R1 = $R1src";
@@ -3661,12 +3739,13 @@ class SideEffectTernaryMemMemRSY<string mnemonic, bits<16> opcode,
 }
 
 class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-                 RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
-  : InstRXF<opcode, (outs cls:$R1),
-            (ins cls:$R1src, cls:$R3, bdxaddr12only:$XBD2),
+                 RegisterOperand cls1, RegisterOperand cls2,
+                 SDPatternOperator load, bits<5> bytes>
+  : InstRXF<opcode, (outs cls1:$R1),
+            (ins cls2:$R1src, cls2:$R3, bdxaddr12only:$XBD2),
             mnemonic#"\t$R1, $R3, $XBD2",
-            [(set cls:$R1, (operator cls:$R1src, cls:$R3,
-                                     (load bdxaddr12only:$XBD2)))]> {
+            [(set cls1:$R1, (operator cls2:$R1src, cls2:$R3,
+                                      (load bdxaddr12only:$XBD2)))]> {
   let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let Constraints = "$R1 = $R1src";
diff --git a/lib/Target/SystemZ/SystemZInstrHFP.td b/lib/Target/SystemZ/SystemZInstrHFP.td
new file mode 100644
index 0000000000000..6d5b4b92f6508
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZInstrHFP.td
@@ -0,0 +1,240 @@
+//==- SystemZInstrHFP.td - Floating-point SystemZ instructions -*- tblgen-*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The instructions in this file implement SystemZ hexadecimal floating-point
+// arithmetic.  Since this format is not mapped to any source-language data
+// type, these instructions are not used for code generation, but are provided
+// for use with the assembler and disassembler only.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and test.
+let Defs = [CC] in {
+  def LTER : UnaryRR <"lter", 0x32,   null_frag, FP32,  FP32>;
+  def LTDR : UnaryRR <"ltdr", 0x22,   null_frag, FP64,  FP64>;
+  def LTXR : UnaryRRE<"ltxr", 0xB362, null_frag, FP128, FP128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Convert floating-point values to narrower representations.
+def LEDR : UnaryRR <"ledr", 0x35,   null_frag, FP32, FP64>;
+def LEXR : UnaryRRE<"lexr", 0xB366, null_frag, FP32, FP128>;
+def LDXR : UnaryRR <"ldxr", 0x25,   null_frag, FP64, FP128>;
+let isAsmParserOnly = 1 in {
+  def LRER : UnaryRR <"lrer", 0x35, null_frag, FP32, FP64>;
+  def LRDR : UnaryRR <"lrdr", 0x25, null_frag, FP64, FP128>;
+}
+
+// Extend floating-point values to wider representations.
+def LDER : UnaryRRE<"lder", 0xB324, null_frag, FP64,  FP32>;
+def LXER : UnaryRRE<"lxer", 0xB326, null_frag, FP128, FP32>;
+def LXDR : UnaryRRE<"lxdr", 0xB325, null_frag, FP128, FP64>;
+
+def LDE : UnaryRXE<"lde", 0xED24, null_frag, FP64,  4>;
+def LXE : UnaryRXE<"lxe", 0xED26, null_frag, FP128, 4>;
+def LXD : UnaryRXE<"lxd", 0xED25, null_frag, FP128, 8>;
+
+// Convert a signed integer register value to a floating-point one.
+def CEFR : UnaryRRE<"cefr", 0xB3B4, null_frag, FP32,  GR32>;
+def CDFR : UnaryRRE<"cdfr", 0xB3B5, null_frag, FP64,  GR32>;
+def CXFR : UnaryRRE<"cxfr", 0xB3B6, null_frag, FP128, GR32>;
+
+def CEGR : UnaryRRE<"cegr", 0xB3C4, null_frag, FP32,  GR64>;
+def CDGR : UnaryRRE<"cdgr", 0xB3C5, null_frag, FP64,  GR64>;
+def CXGR : UnaryRRE<"cxgr", 0xB3C6, null_frag, FP128, GR64>;
+
+// Convert a floating-point register value to a signed integer value,
+// with the second operand (modifier M3) specifying the rounding mode.
+let Defs = [CC] in {
+  def CFER : BinaryRRFe<"cfer", 0xB3B8, GR32, FP32>;
+  def CFDR : BinaryRRFe<"cfdr", 0xB3B9, GR32, FP64>;
+  def CFXR : BinaryRRFe<"cfxr", 0xB3BA, GR32, FP128>;
+
+  def CGER : BinaryRRFe<"cger", 0xB3C8, GR64, FP32>;
+  def CGDR : BinaryRRFe<"cgdr", 0xB3C9, GR64, FP64>;
+  def CGXR : BinaryRRFe<"cgxr", 0xB3CA, GR64, FP128>;
+}
+
+// Convert BFP to HFP.
+let Defs = [CC] in {
+  def THDER : UnaryRRE<"thder", 0xB358, null_frag, FP64, FP32>;
+  def THDR  : UnaryRRE<"thdr",  0xB359, null_frag, FP64, FP64>;
+}
+
+// Convert HFP to BFP.
+let Defs = [CC] in {
+  def TBEDR : BinaryRRFe<"tbedr", 0xB350, FP32, FP64>;
+  def TBDR  : BinaryRRFe<"tbdr",  0xB351, FP64, FP64>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Negation (Load Complement).
+let Defs = [CC] in {
+  def LCER : UnaryRR <"lcer", 0x33,   null_frag, FP32,  FP32>;
+  def LCDR : UnaryRR <"lcdr", 0x23,   null_frag, FP64,  FP64>;
+  def LCXR : UnaryRRE<"lcxr", 0xB363, null_frag, FP128, FP128>;
+}
+
+// Absolute value (Load Positive).
+let Defs = [CC] in {
+  def LPER : UnaryRR <"lper", 0x30,   null_frag, FP32,  FP32>;
+  def LPDR : UnaryRR <"lpdr", 0x20,   null_frag, FP64,  FP64>;
+  def LPXR : UnaryRRE<"lpxr", 0xB360, null_frag, FP128, FP128>;
+}
+
+// Negative absolute value (Load Negative).
+let Defs = [CC] in {
+  def LNER : UnaryRR <"lner", 0x31,   null_frag, FP32,  FP32>;
+  def LNDR : UnaryRR <"lndr", 0x21,   null_frag, FP64,  FP64>;
+  def LNXR : UnaryRRE<"lnxr", 0xB361, null_frag, FP128, FP128>;
+}
+
+// Halve.
+def HER : UnaryRR <"her", 0x34, null_frag, FP32, FP32>;
+def HDR : UnaryRR <"hdr", 0x24, null_frag, FP64, FP64>;
+
+// Square root.
+def SQER : UnaryRRE<"sqer", 0xB245, null_frag, FP32,  FP32>;
+def SQDR : UnaryRRE<"sqdr", 0xB244, null_frag, FP64,  FP64>;
+def SQXR : UnaryRRE<"sqxr", 0xB336, null_frag, FP128, FP128>;
+
+def SQE : UnaryRXE<"sqe", 0xED34, null_frag, FP32, 4>;
+def SQD : UnaryRXE<"sqd", 0xED35, null_frag, FP64, 8>;
+
+// Round to an integer (rounding towards zero).
+def FIER : UnaryRRE<"fier", 0xB377, null_frag, FP32,  FP32>;
+def FIDR : UnaryRRE<"fidr", 0xB37F, null_frag, FP64,  FP64>;
+def FIXR : UnaryRRE<"fixr", 0xB367, null_frag, FP128, FP128>;
+
+
+//===----------------------------------------------------------------------===//
+// Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition.
+let Defs = [CC] in {
+  let isCommutable = 1 in {
+    def AER : BinaryRR<"aer", 0x3A, null_frag, FP32,  FP32>;
+    def ADR : BinaryRR<"adr", 0x2A, null_frag, FP64,  FP64>;
+    def AXR : BinaryRR<"axr", 0x36, null_frag, FP128, FP128>;
+  }
+  def AE : BinaryRX<"ae", 0x7A, null_frag, FP32, load, 4>;
+  def AD : BinaryRX<"ad", 0x6A, null_frag, FP64, load, 8>;
+}
+
+// Addition (unnormalized).
+let Defs = [CC] in {
+  let isCommutable = 1 in {
+    def AUR : BinaryRR<"aur", 0x3E, null_frag, FP32, FP32>;
+    def AWR : BinaryRR<"awr", 0x2E, null_frag, FP64, FP64>;
+  }
+  def AU : BinaryRX<"au", 0x7E, null_frag, FP32, load, 4>;
+  def AW : BinaryRX<"aw", 0x6E, null_frag, FP64, load, 8>;
+}
+
+// Subtraction.
+let Defs = [CC] in {
+  def SER : BinaryRR<"ser", 0x3B, null_frag, FP32,  FP32>;
+  def SDR : BinaryRR<"sdr", 0x2B, null_frag, FP64,  FP64>;
+  def SXR : BinaryRR<"sxr", 0x37, null_frag, FP128, FP128>;
+
+  def SE : BinaryRX<"se", 0x7B, null_frag, FP32, load, 4>;
+  def SD : BinaryRX<"sd", 0x6B, null_frag, FP64, load, 8>;
+}
+
+// Subtraction (unnormalized).
+let Defs = [CC] in {
+  def SUR : BinaryRR<"sur", 0x3F, null_frag, FP32, FP32>;
+  def SWR : BinaryRR<"swr", 0x2F, null_frag, FP64, FP64>;
+
+  def SU : BinaryRX<"su", 0x7F, null_frag, FP32, load, 4>;
+  def SW : BinaryRX<"sw", 0x6F, null_frag, FP64, load, 8>;
+}
+
+// Multiplication.
+let isCommutable = 1 in {
+  def MEER : BinaryRRE<"meer", 0xB337, null_frag, FP32,  FP32>;
+  def MDR  : BinaryRR <"mdr",  0x2C,   null_frag, FP64,  FP64>;
+  def MXR  : BinaryRR <"mxr",  0x26,   null_frag, FP128, FP128>;
+}
+def MEE : BinaryRXE<"mee", 0xED37, null_frag, FP32, load, 4>;
+def MD  : BinaryRX <"md",  0x6C,   null_frag, FP64, load, 8>;
+
+// Extending multiplication (f32 x f32 -> f64).
+def MDER : BinaryRR<"mder", 0x3C, null_frag, FP64, FP32>;
+def MDE  : BinaryRX<"mde",  0x7C, null_frag, FP64, load, 4>;
+let isAsmParserOnly = 1 in {
+  def MER : BinaryRR<"mer", 0x3C, null_frag, FP64, FP32>;
+  def ME  : BinaryRX<"me",  0x7C, null_frag, FP64, load, 4>;
+}
+
+// Extending multiplication (f64 x f64 -> f128).
+def MXDR : BinaryRR<"mxdr", 0x27, null_frag, FP128, FP64>;
+def MXD  : BinaryRX<"mxd",  0x67, null_frag, FP128, load, 8>;
+
+// Fused multiply-add.
+def MAER : TernaryRRD<"maer", 0xB32E, null_frag, FP32, FP32>;
+def MADR : TernaryRRD<"madr", 0xB33E, null_frag, FP64, FP64>;
+def MAE  : TernaryRXF<"mae",  0xED2E, null_frag, FP32, FP32, load, 4>;
+def MAD  : TernaryRXF<"mad",  0xED3E, null_frag, FP64, FP64, load, 8>;
+
+// Fused multiply-subtract.
+def MSER : TernaryRRD<"mser", 0xB32F, null_frag, FP32, FP32>;
+def MSDR : TernaryRRD<"msdr", 0xB33F, null_frag, FP64, FP64>;
+def MSE  : TernaryRXF<"mse",  0xED2F, null_frag, FP32, FP32, load, 4>;
+def MSD  : TernaryRXF<"msd",  0xED3F, null_frag, FP64, FP64, load, 8>;
+
+// Multiplication (unnormalized).
+def MYR  : BinaryRRD<"myr",  0xB33B, null_frag, FP128, FP64>;
+def MYHR : BinaryRRD<"myhr", 0xB33D, null_frag, FP64,  FP64>;
+def MYLR : BinaryRRD<"mylr", 0xB339, null_frag, FP64,  FP64>;
+def MY   : BinaryRXF<"my",   0xED3B, null_frag, FP128, FP64, load, 8>;
+def MYH  : BinaryRXF<"myh",  0xED3D, null_frag, FP64,  FP64, load, 8>;
+def MYL  : BinaryRXF<"myl",  0xED39, null_frag, FP64,  FP64, load, 8>;
+
+// Fused multiply-add (unnormalized).
+def MAYR  : TernaryRRD<"mayr",  0xB33A, null_frag, FP128, FP64>;
+def MAYHR : TernaryRRD<"mayhr", 0xB33C, null_frag, FP64,  FP64>;
+def MAYLR : TernaryRRD<"maylr", 0xB338, null_frag, FP64,  FP64>;
+def MAY   : TernaryRXF<"may",   0xED3A, null_frag, FP128, FP64, load, 8>;
+def MAYH  : TernaryRXF<"mayh",  0xED3C, null_frag, FP64,  FP64, load, 8>;
+def MAYL  : TernaryRXF<"mayl",  0xED38, null_frag, FP64,  FP64, load, 8>;
+
+// Division.
+def DER : BinaryRR <"der", 0x3D,   null_frag, FP32,  FP32>;
+def DDR : BinaryRR <"ddr", 0x2D,   null_frag, FP64,  FP64>;
+def DXR : BinaryRRE<"dxr", 0xB22D, null_frag, FP128, FP128>;
+def DE  : BinaryRX <"de",  0x7D,   null_frag, FP32, load, 4>;
+def DD  : BinaryRX <"dd",  0x6D,   null_frag, FP64, load, 8>;
+
+
+//===----------------------------------------------------------------------===//
+// Comparisons
+//===----------------------------------------------------------------------===//
+
+let Defs = [CC] in {
+  def CER : CompareRR <"cer", 0x39,   null_frag, FP32,  FP32>;
+  def CDR : CompareRR <"cdr", 0x29,   null_frag, FP64,  FP64>;
+  def CXR : CompareRRE<"cxr", 0xB369, null_frag, FP128, FP128>;
+
+  def CE : CompareRX<"ce", 0x79, null_frag, FP32, load, 4>;
+  def CD : CompareRX<"cd", 0x69, null_frag, FP64, load, 8>;
+}
+
diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td
index 612c3b6cf96e8..5f5f2f690e58a 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ13.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -908,6 +908,238 @@ def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>;
 def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
 def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>;
 
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[VecBF], (instregex "LEXR$")>;
+def : InstRW<[VecDF2, VecDF2], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXb], (instregex "LDER$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[VecBF], (instregex "THD(E)?R$")>;
+def : InstRW<[VecBF], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[VecBF], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "SQ(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[VecBF], (instregex "FIER$")>;
+def : InstRW<[VecBF], (instregex "FIDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[VecBF], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[VecBF], (instregex "M(A|S)DR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>;
+def : InstRW<[VecFPd], (instregex "D(E|D)R$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)R$")>;
+def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[VecDF], (instregex "LTDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecDF, Lat15], (instregex "LEDTR$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[VecDF], (instregex "LDETR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat30, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXb, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, BeginGroup], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CZDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDPT$")>;
+def : InstRW<[LSU, LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CXPT$")>;
+def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>;
+def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[VecDF], (instregex "FIDTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEDTR$")>;
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "ESDTR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecDF], (instregex "ADTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[VecDF], (instregex "SDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[VecDF, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[VecDF, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[VecDF], (instregex "QADTR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXb, VecDF, Lat11], (instregex "RRDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, VecDF, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXb, VecDF, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecDF], (instregex "(K|C)DTR$")>;
+def : InstRW<[VecDF, VecDF, Lat11, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[VecDF], (instregex "CEDTR$")>;
+def : InstRW<[VecDF], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
 // --------------------------------- Vector --------------------------------- //
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td
index 670df8ff55413..126eac2e2072d 100644
--- a/lib/Target/SystemZ/SystemZScheduleZ196.td
+++ b/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -839,5 +839,224 @@ def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
 def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
 def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
 
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)R$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[FPU], (instregex "LEXR$")>;
+def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXU], (instregex "LDER$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[FPU], (instregex "THD(E)?R$")>;
+def : InstRW<[FPU], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[FPU], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIER$")>;
+def : InstRW<[FPU], (instregex "FIDR$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[FPU], (instregex "C(E|D)R$")>;
+def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>;
+def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
+def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
+def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
 }
 
diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td
index 1bdb8779dc72b..d38ca64d2e9b9 100644
--- a/lib/Target/SystemZ/SystemZScheduleZEC12.td
+++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -877,5 +877,230 @@ def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
 def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
 def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
 
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)R$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[FPU], (instregex "LEXR$")>;
+def : InstRW<[FPU], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSU], (instregex "LDE$")>;
+def : InstRW<[FXU], (instregex "LDER$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)R$")>;
+
+// Convert from fixed
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)R$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)R$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)R$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)R$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[FPU], (instregex "THD(E)?R$")>;
+def : InstRW<[FPU], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)ER$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[FPU], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIER$")>;
+def : InstRW<[FPU], (instregex "FIDR$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D|U|W)$")>;
+def : InstRW<[FPU], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXD$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MY(H|L)?R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)ER$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>;
+def : InstRW<[FPU2, FPU2, GroupAlone], (instregex "MAY(H|L)?R$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)R$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)$")>;
+def : InstRW<[FPU], (instregex "C(E|D)R$")>;
+def : InstRW<[FPU, FPU, Lat15], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[DFU, Lat20], (instregex "LTDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[DFU, Lat30], (instregex "LEDTR$")>;
+def : InstRW<[DFU, DFU, Lat30], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[DFU, Lat20], (instregex "LDETR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CD(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "CX(F|G)TR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat11, GroupAlone], (instregex "CDL(F|G)TR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat11, GroupAlone], (instregex "CXL(F|G)TR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[FXU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat30, GroupAlone], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "CD(S|U)TR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "CX(S|U)TR$")>;
+def : InstRW<[FXU, DFU, Lat12, GroupAlone], (instregex "C(S|U)DTR$")>;
+def : InstRW<[FXU, FXU, DFU2, DFU2, Lat20, GroupAlone], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[LSU, DFU2, Lat7, GroupAlone], (instregex "CDZT$")>;
+def : InstRW<[LSU, LSU, DFU2, DFU2, Lat10, GroupAlone], (instregex "CXZT$")>;
+def : InstRW<[FXU, LSU, DFU, Lat11, GroupAlone], (instregex "CZDT$")>;
+def : InstRW<[FXU, LSU, DFU, DFU, Lat15, GroupAlone], (instregex "CZXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[DFU, Lat20], (instregex "FIDTR$")>;
+def : InstRW<[DFU2, DFU2, Lat20, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEDTR$")>;
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[FXU, DFU, Lat15, GroupAlone], (instregex "ESDTR$")>;
+def : InstRW<[FXU, DFU, DFU, Lat20, GroupAlone], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[DFU, Lat30], (instregex "ADTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[DFU, Lat30], (instregex "SDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[DFU, Lat30], (instregex "MDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[DFU, Lat30], (instregex "DDTR(A)?$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[DFU, Lat30], (instregex "QADTR$")>;
+def : InstRW<[DFU2, DFU2, Lat30, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[FXU, DFU, Lat30], (instregex "RRDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat30, GroupAlone], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[LSU, DFU, Lat11], (instregex "S(L|R)DT$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[FXU, DFU, Lat11], (instregex "IEDTR$")>;
+def : InstRW<[FXU, DFU2, DFU2, Lat15, GroupAlone], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[DFU, Lat11], (instregex "(K|C)DTR$")>;
+def : InstRW<[DFU, DFU, Lat15, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[DFU, Lat8], (instregex "CEDTR$")>;
+def : InstRW<[DFU, Lat9], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[LSU, DFU, Lat15], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[LSU, DFU2, DFU2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>;
+
 }
 
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 022679a7bc180..0ab0c2f259152 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -42,8 +42,10 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
       HasMiscellaneousExtensions(false),
       HasExecutionHint(false), HasLoadAndTrap(false),
       HasTransactionalExecution(false), HasProcessorAssist(false),
+      HasDFPZonedConversion(false),
       HasVector(false), HasLoadStoreOnCond2(false),
       HasLoadAndZeroRightmostByte(false), HasMessageSecurityAssist5(false),
+      HasDFPPackedConversion(false),
       TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
       TLInfo(TM, *this), TSInfo(), FrameLowering() {}
 
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 770dd7cd939ff..36e51921bf2ff 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -47,10 +47,12 @@ protected:
   bool HasLoadAndTrap;
   bool HasTransactionalExecution;
   bool HasProcessorAssist;
+  bool HasDFPZonedConversion;
   bool HasVector;
   bool HasLoadStoreOnCond2;
   bool HasLoadAndZeroRightmostByte;
   bool HasMessageSecurityAssist5;
+  bool HasDFPPackedConversion;
 
 private:
   Triple TargetTriple;
@@ -133,6 +135,9 @@ public:
   // Return true if the target has the processor-assist facility.
   bool hasProcessorAssist() const { return HasProcessorAssist; }
 
+  // Return true if the target has the DFP zoned-conversion facility.
+  bool hasDFPZonedConversion() const { return HasDFPZonedConversion; }
+
   // Return true if the target has the load-and-zero-rightmost-byte facility.
   bool hasLoadAndZeroRightmostByte() const {
     return HasLoadAndZeroRightmostByte;
@@ -142,6 +147,9 @@ public:
   // extension facility 5.
   bool hasMessageSecurityAssist5() const { return HasMessageSecurityAssist5; }
 
+  // Return true if the target has the DFP packed-conversion facility.
+  bool hasDFPPackedConversion() const { return HasDFPPackedConversion; }
+
   // Return true if the target has the vector facility.
   bool hasVector() const { return HasVector; }
 
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 31a5ca1f4cc27..814377003cbcc 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -84,8 +84,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
                     ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
       setCondCodeAction(CC, T, Expand);
     // Expand floating-point library function operators.
-    for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW,
-                    ISD::FREM, ISD::FMA})
+    for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM,
+                    ISD::FMA})
       setOperationAction(Op, T, Expand);
     // Note supported floating-point library function operators that otherwise
     // default to expand.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 86744b0641323..8d78308afe9df 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -80,6 +80,12 @@ static cl::opt<int> ExperimentalPrefLoopAlignment(
              " of the loop header PC will be 0)."),
     cl::Hidden);
 
+static cl::opt<bool> MulConstantOptimization(
+    "mul-constant-optimization", cl::init(true),
+    cl::desc("Replace 'mul x, Const' with more effective instructions like "
+             "SHIFT, LEA, etc."),
+    cl::Hidden);
+
 /// Call this when the user attempts to do something unsupported, like
 /// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
 /// report_fatal_error, so calling code should attempt to recover without
@@ -670,7 +676,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FSINCOS,   VT, Expand);
     setOperationAction(ISD::FCOS,      VT, Expand);
     setOperationAction(ISD::FREM,      VT, Expand);
-    setOperationAction(ISD::FPOWI,     VT, Expand);
     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
     setOperationAction(ISD::FPOW,      VT, Expand);
     setOperationAction(ISD::FLOG,      VT, Expand);
@@ -30928,6 +30933,75 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
   }
 }
 
+static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
+                                 EVT VT, SDLoc DL) {
+
+  auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) {
+    SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+                                 DAG.getConstant(Mult, DL, VT));
+    Result = DAG.getNode(ISD::SHL, DL, VT, Result,
+                         DAG.getConstant(Shift, DL, MVT::i8));
+    Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
+                         Result);
+    return Result;
+  };
+
+  auto combineMulMulAddOrSub = [&](bool isAdd) {
+    SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+                                 DAG.getConstant(9, DL, VT));
+    Result = DAG.getNode(ISD::MUL, DL, VT, Result, DAG.getConstant(3, DL, VT));
+    Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, N->getOperand(0),
+                         Result);
+    return Result;
+  };
+
+  switch (MulAmt) {
+  default:
+    break;
+  case 11:
+    // mul x, 11 => add ((shl (mul x, 5), 1), x)
+    return combineMulShlAddOrSub(5, 1, /*isAdd*/ true);
+  case 21:
+    // mul x, 21 => add ((shl (mul x, 5), 2), x)
+    return combineMulShlAddOrSub(5, 2, /*isAdd*/ true);
+  case 22:
+    // mul x, 22 => add (add ((shl (mul x, 5), 2), x), x)
+    return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                       combineMulShlAddOrSub(5, 2, /*isAdd*/ true));
+  case 19:
+    // mul x, 19 => sub ((shl (mul x, 5), 2), x)
+    return combineMulShlAddOrSub(5, 2, /*isAdd*/ false);
+  case 13:
+    // mul x, 13 => add ((shl (mul x, 3), 2), x)
+    return combineMulShlAddOrSub(3, 2, /*isAdd*/ true);
+  case 23:
+    // mul x, 13 => sub ((shl (mul x, 3), 3), x)
+    return combineMulShlAddOrSub(3, 3, /*isAdd*/ false);
+  case 14:
+    // mul x, 14 => add (add ((shl (mul x, 3), 2), x), x)
+    return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                       combineMulShlAddOrSub(3, 2, /*isAdd*/ true));
+  case 26:
+    // mul x, 26 => sub ((mul (mul x, 9), 3), x)
+    return combineMulMulAddOrSub(/*isAdd*/ false);
+  case 28:
+    // mul x, 28 => add ((mul (mul x, 9), 3), x)
+    return combineMulMulAddOrSub(/*isAdd*/ true);
+  case 29:
+    // mul x, 29 => add (add ((mul (mul x, 9), 3), x), x)
+    return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                       combineMulMulAddOrSub(/*isAdd*/ true));
+  case 30:
+    // mul x, 30 => sub (sub ((shl x, 5), x), x)
+    return DAG.getNode(
+        ISD::SUB, DL, VT, N->getOperand(0),
+        DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
+                    DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                DAG.getConstant(5, DL, MVT::i8))));
+  }
+  return SDValue();
+}
+
 /// Optimize a single multiply with constant into two operations in order to
 /// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
 static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
@@ -30937,6 +31011,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalize() && VT.isVector())
     return reduceVMULWidth(N, DAG, Subtarget);
 
+  if (!MulConstantOptimization)
+    return SDValue();
   // An imul is usually smaller than the alternative sequence.
   if (DAG.getMachineFunction().getFunction()->optForMinSize())
     return SDValue();
@@ -30992,7 +31068,8 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
     else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
                            DAG.getConstant(MulAmt2, DL, VT));
-  }
+  } else if (!Subtarget.slowLEA())
+    NewMul = combineMulSpecial(MulAmt, N, DAG, VT, DL);
 
   if (!NewMul) {
     assert(MulAmt != 0 &&
author	Dimitry Andric <dim@FreeBSD.org>	2017-05-30 17:37:31 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-05-30 17:37:31 +0000
commit	ee2f195dd3e40f49698ca4dc2666ec09c770e80d (patch)
tree	66fa9a69e5789356dfe844991e64bac9222f3a35 /lib/Target
parent	ab44ce3d598882e51a25eb82eb7ae6308de85ae6 (diff)