vendor/llvm/llvm-trunk-r306956

author: Dimitry Andric <dim@FreeBSD.org> 2017-07-01 13:22:02 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-07-01 13:22:02 +0000
commit: 9df3605dea17e84f8183581f6103bd0c79e2a606 (patch)
tree: 70a2f36ce9eb9bb213603cd7f2f120af53fc176f /lib/Target/AMDGPU
parent: 08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff)
16 files changed, 119 insertions, 201 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 55d18c3f3646b..5a799b2d88d0d 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -36,7 +36,6 @@ FunctionPass *createR600ControlFlowFinalizer();
 FunctionPass *createAMDGPUCFGStructurizerPass();
 
 // SI Passes
-FunctionPass *createSITypeRewriter();
 FunctionPass *createSIAnnotateControlFlowPass();
 FunctionPass *createSIFoldOperandsPass();
 FunctionPass *createSIPeepholeSDWAPass();
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 7494e5decd6f6..f1d899c4d0039 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -262,8 +262,8 @@ def FeatureSDWAMac : SubtargetFeature<"sdwa-mav",
   "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension"
 >;
 
-def FeatureSDWAClampVOPC : SubtargetFeature<"sdwa-clamp-vopc",
-  "HasSDWAClampVOPC",
+def FeatureSDWAOutModsVOPC : SubtargetFeature<"sdwa-out-mods-vopc",
+  "HasSDWAOutModsVOPC",
   "true",
   "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension"
 >;
@@ -452,7 +452,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
    FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
    FeatureScalarStores, FeatureInv2PiInlineImm,
-   FeatureSDWA, FeatureSDWAClampVOPC, FeatureSDWAMac, FeatureDPP
+   FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP
   ]
 >;
 
diff --git a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 2071b6f157cd3..9a391d06c9ea9 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -1776,7 +1776,7 @@ static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
                                           E = EndMBB->succ_end();
          PI != E; ++PI) {
       // Either we have a back-edge to the entry block, or a back-edge to the
-      // succesor of the entry block since the block may be split.
+      // successor of the entry block since the block may be split.
       if ((*PI) != StartMBB &&
           !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
         Succs.insert(
@@ -1831,7 +1831,7 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
   IfBB->addSuccessor(CodeBBStart);
 
   DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
-  // Ensure that the MergeBB is a succesor of the CodeEndBB.
+  // Ensure that the MergeBB is a successor of the CodeEndBB.
   if (!CodeBBEnd->isSuccessor(MergeBB))
     CodeBBEnd->addSuccessor(MergeBB);
 
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index ab5abf2039a5b..be47b900c6f06 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -128,7 +128,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     HasSDWAScalar(false),
     HasSDWASdst(false),
     HasSDWAMac(false),
-    HasSDWAClampVOPC(false),
+    HasSDWAOutModsVOPC(false),
     HasDPP(false),
     FlatAddressSpace(false),
     FlatInstOffsets(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 2b16289c723ef..22cede59086ab 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -153,7 +153,7 @@ protected:
   bool HasSDWAScalar;
   bool HasSDWASdst;
   bool HasSDWAMac;
-  bool HasSDWAClampVOPC;
+  bool HasSDWAOutModsVOPC;
   bool HasDPP;
   bool FlatAddressSpace;
   bool FlatInstOffsets;
@@ -452,8 +452,8 @@ public:
     return HasSDWAMac;
   }
 
-  bool hasSDWAClampVOPC() const {
-    return HasSDWAClampVOPC;
+  bool hasSDWAOutModsVOPC() const {
+    return HasSDWAOutModsVOPC;
   }
 
   /// \brief Returns the offset in bytes from the start of the input buffer
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 04fe9f689806c..425fd35d47de6 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -720,7 +720,6 @@ bool GCNPassConfig::addPreISel() {
     addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
   }
   addPass(createSinkingPass());
-  addPass(createSITypeRewriter());
   addPass(createAMDGPUAnnotateUniformValues());
   if (!LateCFGStructurize) {
     addPass(createSIAnnotateControlFlowPass());
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 88245b01683a5..89a03902dc69b 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -63,7 +63,7 @@ static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
   return false;
 }
 
-void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
+void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                             TTI::UnrollingPreferences &UP) {
   UP.Threshold = 300; // Twice the default.
   UP.MaxCount = UINT_MAX;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 485e20411ab49..9a320bdfcc3d4 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -68,7 +68,8 @@ public:
 
   bool hasBranchDivergence() { return true; }
 
-  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+  void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+                               TTI::UnrollingPreferences &UP);
 
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index e30844f082cdd..917d9cfa69054 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -96,7 +96,6 @@ add_llvm_target(AMDGPUCodeGen
   SIPeepholeSDWA.cpp
   SIRegisterInfo.cpp
   SIShrinkInstructions.cpp
-  SITypeRewriter.cpp
   SIWholeQuadMode.cpp
   GCNIterativeScheduler.cpp
   GCNMinRegStrategy.cpp
diff --git a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 04308fb3aaf64..f26e49295e690 100644
--- a/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -626,7 +626,9 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
   using namespace AMDGPU::SDWA;
 
   if (STI.getFeatureBits()[AMDGPU::FeatureGFX9]) {
-    if (SDWA9EncValues::SRC_VGPR_MIN <= Val &&
+    // XXX: static_cast<int> is needed to avoid stupid warning:
+    // compare with unsigned is always true
+    if (SDWA9EncValues::SRC_VGPR_MIN <= static_cast<int>(Val) &&
         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
       return createRegOperand(getVgprClassId(Width),
                               Val - SDWA9EncValues::SRC_VGPR_MIN);
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index d0f4e00994de1..d39b345bdf032 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4314,6 +4314,23 @@ SDValue SITargetLowering::splitBinaryBitConstantOp(
   return SDValue();
 }
 
+// Returns true if argument is a boolean value which is not serialized into
+// memory or argument and does not require v_cmdmask_b32 to be deserialized.
+static bool isBoolSGPR(SDValue V) {
+  if (V.getValueType() != MVT::i1)
+    return false;
+  switch (V.getOpcode()) {
+  default: break;
+  case ISD::SETCC:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case AMDGPUISD::FP_CLASS:
+    return true;
+  }
+  return false;
+}
+
 SDValue SITargetLowering::performAndCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
   if (DCI.isBeforeLegalize())
@@ -4402,6 +4419,16 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
     }
   }
 
+  if (VT == MVT::i32 &&
+      (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
+    // and x, (sext cc from i1) => select cc, x, 0
+    if (RHS.getOpcode() != ISD::SIGN_EXTEND)
+      std::swap(LHS, RHS);
+    if (isBoolSGPR(RHS.getOperand(0)))
+      return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0),
+                           LHS, DAG.getConstant(0, SDLoc(N), MVT::i32));
+  }
+
   return SDValue();
 }
 
@@ -4941,8 +4968,7 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
   case ISD::SIGN_EXTEND:
   case ISD::ANY_EXTEND: {
     auto Cond = RHS.getOperand(0);
-    if (Cond.getOpcode() != ISD::SETCC &&
-        Cond.getOpcode() != AMDGPUISD::FP_CLASS)
+    if (!isBoolSGPR(Cond))
       break;
     SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
     SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
@@ -5109,6 +5135,35 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   EVT VT = LHS.getValueType();
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+  auto CRHS = dyn_cast<ConstantSDNode>(RHS);
+  if (!CRHS) {
+    CRHS = dyn_cast<ConstantSDNode>(LHS);
+    if (CRHS) {
+      std::swap(LHS, RHS);
+      CC = getSetCCSwappedOperands(CC);
+    }
+  }
+
+  if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
+      isBoolSGPR(LHS.getOperand(0))) {
+    // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1
+    // setcc (sext from i1 cc), -1, eq|sle|uge) => cc
+    // setcc (sext from i1 cc),  0, eq|sge|ule) => not cc => xor cc, -1
+    // setcc (sext from i1 cc),  0, ne|ugt|slt) => cc
+    if ((CRHS->isAllOnesValue() &&
+         (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
+        (CRHS->isNullValue() &&
+         (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
+      return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
+                         DAG.getConstant(-1, SL, MVT::i1));
+    if ((CRHS->isAllOnesValue() &&
+         (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
+        (CRHS->isNullValue() &&
+         (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
+      return LHS.getOperand(0);
+  }
 
   if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
                                            VT != MVT::f16))
@@ -5116,7 +5171,6 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
 
   // Match isinf pattern
   // (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
-  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
   if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
     const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
     if (!CRHS)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index c9b48fea7225e..b6784ec14e9f8 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -770,7 +770,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
-      Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+      Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
     }
 
     return;
@@ -871,7 +871,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 
     if (ST.hasScalarStores()) {
       // m0 is used for offset to scalar stores if used to spill.
-      Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+      Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine | RegState::Dead);
     }
 
     return;
@@ -2444,8 +2444,6 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
 
     int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
-    if ( DstIdx == -1)
-      DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::sdst);
 
     const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
 
@@ -2488,14 +2486,20 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
           ErrInfo = "Only VCC allowed as dst in SDWA instructions on VI";
           return false;
         }
-      } else if (!ST.hasSDWAClampVOPC()) {
+      } else if (!ST.hasSDWAOutModsVOPC()) {
         // No clamp allowed on GFX9 for VOPC
         const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
-        if (Clamp != nullptr &&
-          (!Clamp->isImm() || Clamp->getImm() != 0)) {
+        if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
           ErrInfo = "Clamp not allowed in VOPC SDWA instructions on VI";
           return false;
         }
+
+        // No omod allowed on GFX9 for VOPC
+        const MachineOperand *OMod = getNamedOperand(MI, AMDGPU::OpName::omod);
+        if (OMod && (!OMod->isImm() || OMod->getImm() != 0)) {
+          ErrInfo = "OMod not allowed in VOPC SDWA instructions on VI";
+          return false;
+        }
       }
     }
   }
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 3b4a8b5d1e817..4a81fb3b463a9 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -336,6 +336,10 @@ def NegSubInlineConst16 : ImmLeaf<i16, [{
   return Imm < -16 && Imm >= -64;
 }], NegateImm>;
 
+def ShiftAmt32Imm : PatLeaf <(imm), [{
+  return N->getZExtValue() < 32;
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Custom Operands
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index 3b4bdc864253c..bcc685015cf5e 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -929,6 +929,14 @@ def : UMad24Pat<V_MAD_U32_U24>;
 defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
 def : ROTRPattern <V_ALIGNBIT_B32>;
 
+def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),
+          (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+                          (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
+
+def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))),
+          (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)),
+                          (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>;
+
 /********** ====================== **********/
 /**********   Indirect addressing  **********/
 /********** ====================== **********/
diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 4ac23ef03cb32..e2ac6631d2f32 100644
--- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -627,10 +627,13 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(const MachineInstr &MI,
         return false;
     }
 
-    if (!ST.hasSDWAClampVOPC() && TII->hasModifiersSet(MI, AMDGPU::OpName::clamp))
+    if (!ST.hasSDWAOutModsVOPC() &&
+        (TII->hasModifiersSet(MI, AMDGPU::OpName::clamp) ||
+         TII->hasModifiersSet(MI, AMDGPU::OpName::omod)))
       return false;
 
-  } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst)) {
+  } else if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst) ||
+             !TII->getNamedOperand(MI, AMDGPU::OpName::vdst)) {
     return false;
   }
 
@@ -649,25 +652,24 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
     SDWAOpcode = AMDGPU::getSDWAOp(AMDGPU::getVOPe32(MI.getOpcode()));
   assert(SDWAOpcode != -1);
 
-  // Copy dst, if it is present in original then should also be present in SDWA
-  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
-  if (!Dst && !TII->isVOPC(MI))
-    return false;
-
   const MCInstrDesc &SDWADesc = TII->get(SDWAOpcode);
 
   // Create SDWA version of instruction MI and initialize its operands
   MachineInstrBuilder SDWAInst =
     BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), SDWADesc);
 
+  // Copy dst, if it is present in original then should also be present in SDWA
+  MachineOperand *Dst = TII->getNamedOperand(MI, AMDGPU::OpName::vdst);
   if (Dst) {
     assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::vdst) != -1);
     SDWAInst.add(*Dst);
-  } else {
-    Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+  } else if ((Dst = TII->getNamedOperand(MI, AMDGPU::OpName::sdst))) {
     assert(Dst &&
            AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
     SDWAInst.add(*Dst);
+  } else {
+    assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::sdst) != -1);
+    SDWAInst.addReg(AMDGPU::VCC, RegState::Define);
   }
 
   // Copy src0, initialize src0_modifiers. All sdwa instructions has src0 and
@@ -714,20 +716,22 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI,
   }
 
   // Copy omod if present, initialize otherwise if needed
-  MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
-  if (OMod) {
-    assert(AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1);
-    SDWAInst.add(*OMod);
-  } else if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
-    SDWAInst.addImm(0);
+  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::omod) != -1) {
+    MachineOperand *OMod = TII->getNamedOperand(MI, AMDGPU::OpName::omod);
+    if (OMod) {
+      SDWAInst.add(*OMod);
+    } else {
+      SDWAInst.addImm(0);
+    }
   }
 
-  // Initialize dst_sel and dst_unused if present
-  if (Dst) {
-    assert(
-      AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1 &&
-      AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1);
+  // Initialize dst_sel if present
+  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_sel) != -1) {
     SDWAInst.addImm(AMDGPU::SDWA::SdwaSel::DWORD);
+  }
+
+  // Initialize dst_unused if present
+  if (AMDGPU::getNamedOperandIdx(SDWAOpcode, AMDGPU::OpName::dst_unused) != -1) {
     SDWAInst.addImm(AMDGPU::SDWA::DstUnused::UNUSED_PAD);
   }
 
diff --git a/lib/Target/AMDGPU/SITypeRewriter.cpp b/lib/Target/AMDGPU/SITypeRewriter.cpp
deleted file mode 100644
index aad68537f7791..0000000000000
--- a/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- SITypeRewriter.cpp - Remove unwanted types ------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass removes performs the following type substitution on all
-/// non-compute shaders:
-///
-/// v16i8 => i128
-///   - v16i8 is used for constant memory resource descriptors.  This type is
-///      legal for some compute APIs, and we don't want to declare it as legal
-///      in the backend, because we want the legalizer to expand all v16i8
-///      operations.
-/// v1* => *
-///   - Having v1* types complicates the legalizer and we can easily replace
-///   - them with the element type.
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstVisitor.h"
-
-using namespace llvm;
-
-namespace {
-
-class SITypeRewriter : public FunctionPass,
-                       public InstVisitor<SITypeRewriter> {
-
-  static char ID;
-  Module *Mod;
-  Type *v16i8;
-  Type *v4i32;
-
-public:
-  SITypeRewriter() : FunctionPass(ID) { }
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-  StringRef getPassName() const override { return "SI Type Rewriter"; }
-  void visitLoadInst(LoadInst &I);
-  void visitCallInst(CallInst &I);
-  void visitBitCast(BitCastInst &I);
-};
-
-} // End anonymous namespace
-
-char SITypeRewriter::ID = 0;
-
-bool SITypeRewriter::doInitialization(Module &M) {
-  Mod = &M;
-  v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
-  v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
-  return false;
-}
-
-bool SITypeRewriter::runOnFunction(Function &F) {
-  if (!AMDGPU::isShader(F.getCallingConv()))
-    return false;
-
-  visit(F);
-  visit(F);
-
-  return false;
-}
-
-void SITypeRewriter::visitLoadInst(LoadInst &I) {
-  Value *Ptr = I.getPointerOperand();
-  Type *PtrTy = Ptr->getType();
-  Type *ElemTy = PtrTy->getPointerElementType();
-  IRBuilder<> Builder(&I);
-  if (ElemTy == v16i8)  {
-    Value *BitCast = Builder.CreateBitCast(Ptr,
-        PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
-    LoadInst *Load = Builder.CreateLoad(BitCast);
-    SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
-    I.getAllMetadataOtherThanDebugLoc(MD);
-    for (unsigned i = 0, e = MD.size(); i != e; ++i) {
-      Load->setMetadata(MD[i].first, MD[i].second);
-    }
-    Value *BitCastLoad = Builder.CreateBitCast(Load, I.getType());
-    I.replaceAllUsesWith(BitCastLoad);
-    I.eraseFromParent();
-  }
-}
-
-void SITypeRewriter::visitCallInst(CallInst &I) {
-  IRBuilder<> Builder(&I);
-
-  SmallVector <Value*, 8> Args;
-  SmallVector <Type*, 8> Types;
-  bool NeedToReplace = false;
-  Function *F = I.getCalledFunction();
-  if (!F)
-    return;
-
-  std::string Name = F->getName();
-  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-    Value *Arg = I.getArgOperand(i);
-    if (Arg->getType() == v16i8) {
-      Args.push_back(Builder.CreateBitCast(Arg, v4i32));
-      Types.push_back(v4i32);
-      NeedToReplace = true;
-      Name = Name + ".v4i32";
-    } else if (Arg->getType()->isVectorTy() &&
-               Arg->getType()->getVectorNumElements() == 1 &&
-               Arg->getType()->getVectorElementType() ==
-                                              Type::getInt32Ty(I.getContext())){
-      Type *ElementTy = Arg->getType()->getVectorElementType();
-      std::string TypeName = "i32";
-      InsertElementInst *Def = cast<InsertElementInst>(Arg);
-      Args.push_back(Def->getOperand(1));
-      Types.push_back(ElementTy);
-      std::string VecTypeName = "v1" + TypeName;
-      Name = Name.replace(Name.find(VecTypeName), VecTypeName.length(), TypeName);
-      NeedToReplace = true;
-    } else {
-      Args.push_back(Arg);
-      Types.push_back(Arg->getType());
-    }
-  }
-
-  if (!NeedToReplace) {
-    return;
-  }
-  Function *NewF = Mod->getFunction(Name);
-  if (!NewF) {
-    NewF = Function::Create(FunctionType::get(F->getReturnType(), Types, false), GlobalValue::ExternalLinkage, Name, Mod);
-    NewF->setAttributes(F->getAttributes());
-  }
-  I.replaceAllUsesWith(Builder.CreateCall(NewF, Args));
-  I.eraseFromParent();
-}
-
-void SITypeRewriter::visitBitCast(BitCastInst &I) {
-  IRBuilder<> Builder(&I);
-  if (I.getDestTy() != v4i32) {
-    return;
-  }
-
-  if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
-    if (Op->getSrcTy() == v4i32) {
-      I.replaceAllUsesWith(Op->getOperand(0));
-      I.eraseFromParent();
-    }
-  }
-}
-
-FunctionPass *llvm::createSITypeRewriter() {
-  return new SITypeRewriter();
-}
author	Dimitry Andric <dim@FreeBSD.org>	2017-07-01 13:22:02 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-07-01 13:22:02 +0000
commit	9df3605dea17e84f8183581f6103bd0c79e2a606 (patch)
tree	70a2f36ce9eb9bb213603cd7f2f120af53fc176f /lib/Target/AMDGPU
parent	08bbd35a80bf7765fe0d3043f9eb5a2f2786b649 (diff)