vendor/llvm/llvm-trunk-r302418

author: Dimitry Andric <dim@FreeBSD.org> 2017-05-08 17:12:57 +0000
committer: Dimitry Andric <dim@FreeBSD.org> 2017-05-08 17:12:57 +0000
commit: c46e6a5940c50058e00c0c5f9123fd82e338d29a (patch)
tree: 89a719d723035c54a190b1f81d329834f1f93336 /lib/Target/AMDGPU
parent: 148779df305667b6942fee7e758fdf81a6498f38 (diff)
8 files changed, 75 insertions, 57 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 2ce23dbf08e6..f473944cd528 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -713,7 +713,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
       S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
       S_00B84C_EXCP_EN_MSB(0) |
-      S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
+      // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
+      S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
       S_00B84C_EXCP_EN(0);
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 64e1b8f0d7f0..915d1d9e0e68 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3580,7 +3580,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
     const SDValue Op, KnownBits &Known,
     const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
 
-  Known.Zero.clearAllBits(); Known.One.clearAllBits(); // Don't know anything.
+  Known.resetAll(); // Don't know anything.
 
   KnownBits Known2;
   unsigned Opc = Op.getOpcode();
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index a5edc0c3b937..623b2c88ab8f 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -82,25 +82,28 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
   switch (MI.getOpcode()) {
   case TargetOpcode::G_LOAD: {
     // FIXME: Should we be hard coding the size for these mappings?
-    InstructionMapping SSMapping(1, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
-                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
-      2); // Num Operands
-    AltMappings.emplace_back(std::move(SSMapping));
+    const InstructionMapping &SSMapping = getInstructionMapping(
+        1, 1, getOperandsMapping(
+                  {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+                   AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+        2); // Num Operands
+    AltMappings.push_back(&SSMapping);
 
-    InstructionMapping VVMapping(2, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
-                          AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
-      2); // Num Operands
-    AltMappings.emplace_back(std::move(VVMapping));
+    const InstructionMapping &VVMapping = getInstructionMapping(
+        2, 1, getOperandsMapping(
+                  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                   AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
+        2); // Num Operands
+    AltMappings.push_back(&VVMapping);
 
     // FIXME: Should this be the pointer-size (64-bits) or the size of the
     // register that will hold the bufffer resourc (128-bits).
-    InstructionMapping VSMapping(3, 1,
-      getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
-                          AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
-      2); // Num Operands
-    AltMappings.emplace_back(std::move(VSMapping));
+    const InstructionMapping &VSMapping = getInstructionMapping(
+        3, 1, getOperandsMapping(
+                  {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+                   AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+        2); // Num Operands
+    AltMappings.push_back(&VSMapping);
 
     return AltMappings;
 
@@ -124,13 +127,11 @@ static bool isInstrUniform(const MachineInstr &MI) {
   return AMDGPU::isUniformMMO(MMO);
 }
 
-RegisterBankInfo::InstructionMapping
+const RegisterBankInfo::InstructionMapping &
 AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
 
   const MachineFunction &MF = *MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  RegisterBankInfo::InstructionMapping Mapping =
-      InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
   SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
   unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
   unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
@@ -150,32 +151,34 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
 
   OpdsMapping[0] = ValMapping;
   OpdsMapping[1] = PtrMapping;
-  Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+  const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping(
+      1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
   return Mapping;
 
   // FIXME: Do we want to add a mapping for FLAT load, or should we just
   // handle that during instruction selection?
 }
 
-RegisterBankInfo::InstructionMapping
+const RegisterBankInfo::InstructionMapping &
 AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
-  RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+  const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
 
   if (Mapping.isValid())
     return Mapping;
 
   const MachineFunction &MF = *MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
   SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
 
+  bool IsComplete = true;
   switch (MI.getOpcode()) {
-  default: break;
+  default:
+    IsComplete = false;
+    break;
   case AMDGPU::G_CONSTANT: {
     unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
     OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
-    Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
-    return Mapping;
+    break;
   }
   case AMDGPU::G_GEP: {
     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -185,8 +188,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
       OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
     }
-    Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
-    return Mapping;
+    break;
   }
   case AMDGPU::G_STORE: {
     assert(MI.getOperand(0).isReg());
@@ -203,28 +205,27 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
 
     OpdsMapping[0] = ValMapping;
     OpdsMapping[1] = PtrMapping;
-    Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
-    return Mapping;
+    break;
   }
 
   case AMDGPU::G_LOAD:
     return getInstrMappingForLoad(MI);
   }
 
-  unsigned BankID = AMDGPU::SGPRRegBankID;
+  if (!IsComplete) {
+    unsigned BankID = AMDGPU::SGPRRegBankID;
 
-  Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
-  unsigned Size = 0;
-  for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
-    // If the operand is not a register default to the size of the previous
-    // operand.
-    // FIXME: Can't we pull the types from the MachineInstr rather than the
-    // operands.
-    if (MI.getOperand(Idx).isReg())
-      Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
-    OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
+    unsigned Size = 0;
+    for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
+      // If the operand is not a register default to the size of the previous
+      // operand.
+      // FIXME: Can't we pull the types from the MachineInstr rather than the
+      // operands.
+      if (MI.getOperand(Idx).isReg())
+        Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
+      OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
+    }
   }
-  Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
-
-  return Mapping;
+  return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
+                               MI.getNumOperands());
 }
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index f13bde87ef2d..7c198a1b8a3f 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -44,7 +44,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
   /// See RegisterBankInfo::applyMapping.
   void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
 
-  RegisterBankInfo::InstructionMapping
+  const RegisterBankInfo::InstructionMapping &
   getInstrMappingForLoad(const MachineInstr &MI) const;
 
 public:
@@ -59,7 +59,8 @@ public:
   InstructionMappings
   getInstrAlternativeMappings(const MachineInstr &MI) const override;
 
-  InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+  const InstructionMapping &
+  getInstrMapping(const MachineInstr &MI) const override;
 };
 } // End llvm namespace.
 #endif
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 86e3b37b09e9..1279f845de0e 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -353,7 +353,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   if (OffsetRegUsed &&
       PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
     BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
-      .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
+      .addReg(PreloadedScratchWaveOffsetReg,
+              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
   }
 
   if (CopyBuffer && !CopyBufferFirst) {
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 853c8737b464..cc93c27731ff 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1042,6 +1042,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
 static void allocateSystemSGPRs(CCState &CCInfo,
                                 MachineFunction &MF,
                                 SIMachineFunctionInfo &Info,
+                                CallingConv::ID CallConv,
                                 bool IsShader) {
   if (Info.hasWorkGroupIDX()) {
     unsigned Reg = Info.addWorkGroupIDX();
@@ -1072,8 +1073,15 @@ static void allocateSystemSGPRs(CCState &CCInfo,
     unsigned PrivateSegmentWaveByteOffsetReg;
 
     if (IsShader) {
-      PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
-      Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+      PrivateSegmentWaveByteOffsetReg =
+        Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
+
+      // This is true if the scratch wave byte offset doesn't have a fixed
+      // location.
+      if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
+        PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
+        Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+      }
     } else
       PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
 
@@ -1310,7 +1318,7 @@ SDValue SITargetLowering::LowerFormalArguments(
 
   // Start adding system SGPRs.
   if (IsEntryFunc)
-    allocateSystemSGPRs(CCInfo, MF, *Info, IsShader);
+    allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
 
   reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
 
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 9122cd72d323..b5e3ce3dfe3e 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1087,7 +1087,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
            (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
         MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
         if (ContainingLoop) {
-          MachineBasicBlock *TBB = ContainingLoop->getTopBlock();
+          MachineBasicBlock *TBB = ContainingLoop->getHeader();
           BlockWaitcntBrackets *ScoreBracket =
               BlockWaitcntBracketsMap[TBB].get();
           if (!ScoreBracket) {
@@ -1097,7 +1097,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
           }
           ScoreBracket->setRevisitLoop(true);
           DEBUG(dbgs() << "set-revisit: block"
-                       << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+                       << ContainingLoop->getHeader()->getNumber() << '\n';);
         }
       }
 
@@ -1758,12 +1758,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
     // If we are walking into the block from before the loop, then guarantee
     // at least 1 re-walk over the loop to propagate the information, even if
     // no S_WAITCNT instructions were generated.
-    if (ContainingLoop && ContainingLoop->getTopBlock() == &MBB && J < I &&
+    if (ContainingLoop && ContainingLoop->getHeader() == &MBB && J < I &&
         (BlockWaitcntProcessedSet.find(&MBB) ==
          BlockWaitcntProcessedSet.end())) {
       BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
       DEBUG(dbgs() << "set-revisit: block"
-                   << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+                   << ContainingLoop->getHeader()->getNumber() << '\n';);
     }
 
     // Walk over the instructions.
@@ -1774,7 +1774,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
 
     // See if we want to revisit the loop.
     if (ContainingLoop && loopBottom(ContainingLoop) == &MBB) {
-      MachineBasicBlock *EntryBB = ContainingLoop->getTopBlock();
+      MachineBasicBlock *EntryBB = ContainingLoop->getHeader();
       BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get();
       if (EntrySB && EntrySB->getRevisitLoop()) {
         EntrySB->setRevisitLoop(false);
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index b6a982aee6be..adebb8c4a1c5 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -122,9 +122,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
   bool HasStackObjects = FrameInfo.hasStackObjects();
 
-  if (HasStackObjects || MaySpill)
+  if (HasStackObjects || MaySpill) {
     PrivateSegmentWaveByteOffset = true;
 
+    // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
+    if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
+        (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
+      PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+  }
+
   if (ST.isAmdCodeObjectV2(MF)) {
     if (HasStackObjects || MaySpill)
       PrivateSegmentBuffer = true;
author	Dimitry Andric <dim@FreeBSD.org>	2017-05-08 17:12:57 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2017-05-08 17:12:57 +0000
commit	c46e6a5940c50058e00c0c5f9123fd82e338d29a (patch)
tree	89a719d723035c54a190b1f81d329834f1f93336 /lib/Target/AMDGPU
parent	148779df305667b6942fee7e758fdf81a6498f38 (diff)