summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-08 17:12:57 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-08 17:12:57 +0000
commitc46e6a5940c50058e00c0c5f9123fd82e338d29a (patch)
tree89a719d723035c54a190b1f81d329834f1f93336 /lib/Target/AMDGPU
parent148779df305667b6942fee7e758fdf81a6498f38 (diff)
Notes
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp87
-rw-r--r--lib/Target/AMDGPU/AMDGPURegisterBankInfo.h5
-rw-r--r--lib/Target/AMDGPU/SIFrameLowering.cpp3
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp14
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp10
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp8
8 files changed, 75 insertions, 57 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 2ce23dbf08e6..f473944cd528 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -713,7 +713,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
S_00B84C_EXCP_EN_MSB(0) |
- S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
+ // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
+ S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
S_00B84C_EXCP_EN(0);
}
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 64e1b8f0d7f0..915d1d9e0e68 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3580,7 +3580,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known,
const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
- Known.Zero.clearAllBits(); Known.One.clearAllBits(); // Don't know anything.
+ Known.resetAll(); // Don't know anything.
KnownBits Known2;
unsigned Opc = Op.getOpcode();
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index a5edc0c3b937..623b2c88ab8f 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -82,25 +82,28 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
switch (MI.getOpcode()) {
case TargetOpcode::G_LOAD: {
// FIXME: Should we be hard coding the size for these mappings?
- InstructionMapping SSMapping(1, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
- 2); // Num Operands
- AltMappings.emplace_back(std::move(SSMapping));
+ const InstructionMapping &SSMapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.push_back(&SSMapping);
- InstructionMapping VVMapping(2, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
- 2); // Num Operands
- AltMappings.emplace_back(std::move(VVMapping));
+ const InstructionMapping &VVMapping = getInstructionMapping(
+ 2, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.push_back(&VVMapping);
// FIXME: Should this be the pointer-size (64-bits) or the size of the
// register that will hold the bufffer resourc (128-bits).
- InstructionMapping VSMapping(3, 1,
- getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
- AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
- 2); // Num Operands
- AltMappings.emplace_back(std::move(VSMapping));
+ const InstructionMapping &VSMapping = getInstructionMapping(
+ 3, 1, getOperandsMapping(
+ {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+ AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}),
+ 2); // Num Operands
+ AltMappings.push_back(&VSMapping);
return AltMappings;
@@ -124,13 +127,11 @@ static bool isInstrUniform(const MachineInstr &MI) {
return AMDGPU::isUniformMMO(MMO);
}
-RegisterBankInfo::InstructionMapping
+const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- RegisterBankInfo::InstructionMapping Mapping =
- InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
@@ -150,32 +151,34 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
OpdsMapping[0] = ValMapping;
OpdsMapping[1] = PtrMapping;
- Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+ const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping(
+ 1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands());
return Mapping;
// FIXME: Do we want to add a mapping for FLAT load, or should we just
// handle that during instruction selection?
}
-RegisterBankInfo::InstructionMapping
+const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
- RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+ const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI);
if (Mapping.isValid())
return Mapping;
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands());
+ bool IsComplete = true;
switch (MI.getOpcode()) {
- default: break;
+ default:
+ IsComplete = false;
+ break;
case AMDGPU::G_CONSTANT: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
- Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
- return Mapping;
+ break;
}
case AMDGPU::G_GEP: {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -185,8 +188,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits();
OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
}
- Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
- return Mapping;
+ break;
}
case AMDGPU::G_STORE: {
assert(MI.getOperand(0).isReg());
@@ -203,28 +205,27 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = ValMapping;
OpdsMapping[1] = PtrMapping;
- Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
- return Mapping;
+ break;
}
case AMDGPU::G_LOAD:
return getInstrMappingForLoad(MI);
}
- unsigned BankID = AMDGPU::SGPRRegBankID;
+ if (!IsComplete) {
+ unsigned BankID = AMDGPU::SGPRRegBankID;
- Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()};
- unsigned Size = 0;
- for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
- // If the operand is not a register default to the size of the previous
- // operand.
- // FIXME: Can't we pull the types from the MachineInstr rather than the
- // operands.
- if (MI.getOperand(Idx).isReg())
- Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
- OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
+ unsigned Size = 0;
+ for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) {
+ // If the operand is not a register default to the size of the previous
+ // operand.
+ // FIXME: Can't we pull the types from the MachineInstr rather than the
+ // operands.
+ if (MI.getOperand(Idx).isReg())
+ Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI);
+ OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size));
+ }
}
- Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
-
- return Mapping;
+ return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping),
+ MI.getNumOperands());
}
diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index f13bde87ef2d..7c198a1b8a3f 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -44,7 +44,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
- RegisterBankInfo::InstructionMapping
+ const RegisterBankInfo::InstructionMapping &
getInstrMappingForLoad(const MachineInstr &MI) const;
public:
@@ -59,7 +59,8 @@ public:
InstructionMappings
getInstrAlternativeMappings(const MachineInstr &MI) const override;
- InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+ const InstructionMapping &
+ getInstrMapping(const MachineInstr &MI) const override;
};
} // End llvm namespace.
#endif
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
index 86e3b37b09e9..1279f845de0e 100644
--- a/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -353,7 +353,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (OffsetRegUsed &&
PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
- .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
+ .addReg(PreloadedScratchWaveOffsetReg,
+ MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
}
if (CopyBuffer && !CopyBufferFirst) {
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 853c8737b464..cc93c27731ff 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1042,6 +1042,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
static void allocateSystemSGPRs(CCState &CCInfo,
MachineFunction &MF,
SIMachineFunctionInfo &Info,
+ CallingConv::ID CallConv,
bool IsShader) {
if (Info.hasWorkGroupIDX()) {
unsigned Reg = Info.addWorkGroupIDX();
@@ -1072,8 +1073,15 @@ static void allocateSystemSGPRs(CCState &CCInfo,
unsigned PrivateSegmentWaveByteOffsetReg;
if (IsShader) {
- PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
- Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ PrivateSegmentWaveByteOffsetReg =
+ Info.getPrivateSegmentWaveByteOffsetSystemSGPR();
+
+ // This is true if the scratch wave byte offset doesn't have a fixed
+ // location.
+ if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
+ PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
+ Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ }
} else
PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();
@@ -1310,7 +1318,7 @@ SDValue SITargetLowering::LowerFormalArguments(
// Start adding system SGPRs.
if (IsEntryFunc)
- allocateSystemSGPRs(CCInfo, MF, *Info, IsShader);
+ allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader);
reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 9122cd72d323..b5e3ce3dfe3e 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1087,7 +1087,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
(CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) {
MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent());
if (ContainingLoop) {
- MachineBasicBlock *TBB = ContainingLoop->getTopBlock();
+ MachineBasicBlock *TBB = ContainingLoop->getHeader();
BlockWaitcntBrackets *ScoreBracket =
BlockWaitcntBracketsMap[TBB].get();
if (!ScoreBracket) {
@@ -1097,7 +1097,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
}
ScoreBracket->setRevisitLoop(true);
DEBUG(dbgs() << "set-revisit: block"
- << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+ << ContainingLoop->getHeader()->getNumber() << '\n';);
}
}
@@ -1758,12 +1758,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
// If we are walking into the block from before the loop, then guarantee
// at least 1 re-walk over the loop to propagate the information, even if
// no S_WAITCNT instructions were generated.
- if (ContainingLoop && ContainingLoop->getTopBlock() == &MBB && J < I &&
+ if (ContainingLoop && ContainingLoop->getHeader() == &MBB && J < I &&
(BlockWaitcntProcessedSet.find(&MBB) ==
BlockWaitcntProcessedSet.end())) {
BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
DEBUG(dbgs() << "set-revisit: block"
- << ContainingLoop->getTopBlock()->getNumber() << '\n';);
+ << ContainingLoop->getHeader()->getNumber() << '\n';);
}
// Walk over the instructions.
@@ -1774,7 +1774,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
// See if we want to revisit the loop.
if (ContainingLoop && loopBottom(ContainingLoop) == &MBB) {
- MachineBasicBlock *EntryBB = ContainingLoop->getTopBlock();
+ MachineBasicBlock *EntryBB = ContainingLoop->getHeader();
BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get();
if (EntrySB && EntrySB->getRevisitLoop()) {
EntrySB->setRevisitLoop(false);
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index b6a982aee6be..adebb8c4a1c5 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -122,9 +122,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
bool MaySpill = ST.isVGPRSpillingEnabled(*F);
bool HasStackObjects = FrameInfo.hasStackObjects();
- if (HasStackObjects || MaySpill)
+ if (HasStackObjects || MaySpill) {
PrivateSegmentWaveByteOffset = true;
+ // HS and GS always have the scratch wave offset in SGPR5 on GFX9.
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 &&
+ (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS))
+ PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5;
+ }
+
if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;