diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-08 17:12:57 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-08 17:12:57 +0000 |
| commit | c46e6a5940c50058e00c0c5f9123fd82e338d29a (patch) | |
| tree | 89a719d723035c54a190b1f81d329834f1f93336 /lib/Target/AMDGPU | |
| parent | 148779df305667b6942fee7e758fdf81a6498f38 (diff) | |
Notes
Diffstat (limited to 'lib/Target/AMDGPU')
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 87 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/AMDGPURegisterBankInfo.h | 5 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/SIFrameLowering.cpp | 3 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 14 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 10 | ||||
| -rw-r--r-- | lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 8 |
8 files changed, 75 insertions, 57 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 2ce23dbf08e6..f473944cd528 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -713,7 +713,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) | S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) | S_00B84C_EXCP_EN_MSB(0) | - S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) | + // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP. + S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) | S_00B84C_EXCP_EN(0); } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 64e1b8f0d7f0..915d1d9e0e68 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3580,7 +3580,7 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { - Known.Zero.clearAllBits(); Known.One.clearAllBits(); // Don't know anything. + Known.resetAll(); // Don't know anything. KnownBits Known2; unsigned Opc = Op.getOpcode(); diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index a5edc0c3b937..623b2c88ab8f 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -82,25 +82,28 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( switch (MI.getOpcode()) { case TargetOpcode::G_LOAD: { // FIXME: Should we be hard coding the size for these mappings? - InstructionMapping SSMapping(1, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(SSMapping)); + const InstructionMapping &SSMapping = getInstructionMapping( + 1, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.push_back(&SSMapping); - InstructionMapping VVMapping(2, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(VVMapping)); + const InstructionMapping &VVMapping = getInstructionMapping( + 2, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.push_back(&VVMapping); // FIXME: Should this be the pointer-size (64-bits) or the size of the // register that will hold the bufffer resourc (128-bits). - InstructionMapping VSMapping(3, 1, - getOperandsMapping({AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), - AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), - 2); // Num Operands - AltMappings.emplace_back(std::move(VSMapping)); + const InstructionMapping &VSMapping = getInstructionMapping( + 3, 1, getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64)}), + 2); // Num Operands + AltMappings.push_back(&VSMapping); return AltMappings; @@ -124,13 +127,11 @@ static bool isInstrUniform(const MachineInstr &MI) { return AMDGPU::isUniformMMO(MMO); } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - RegisterBankInfo::InstructionMapping Mapping = - InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands()); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); unsigned PtrSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI); @@ -150,32 +151,34 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { OpdsMapping[0] = ValMapping; OpdsMapping[1] = PtrMapping; - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); + const RegisterBankInfo::InstructionMapping &Mapping = getInstructionMapping( + 1, 1, getOperandsMapping(OpdsMapping), MI.getNumOperands()); return Mapping; // FIXME: Do we want to add a mapping for FLAT load, or should we just // handle that during instruction selection? } -RegisterBankInfo::InstructionMapping +const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI); + const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; SmallVector<const ValueMapping*, 8> OpdsMapping(MI.getNumOperands()); + bool IsComplete = true; switch (MI.getOpcode()) { - default: break; + default: + IsComplete = false; + break; case AMDGPU::G_CONSTANT: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + break; } case AMDGPU::G_GEP: { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -185,8 +188,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { unsigned Size = MRI.getType(MI.getOperand(i).getReg()).getSizeInBits(); OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); } - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + break; } case AMDGPU::G_STORE: { assert(MI.getOperand(0).isReg()); @@ -203,28 +205,27 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = ValMapping; OpdsMapping[1] = PtrMapping; - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - return Mapping; + break; } case AMDGPU::G_LOAD: return getInstrMappingForLoad(MI); } - unsigned BankID = AMDGPU::SGPRRegBankID; + if (!IsComplete) { + unsigned BankID = AMDGPU::SGPRRegBankID; - Mapping = InstructionMapping{1, 1, nullptr, MI.getNumOperands()}; - unsigned Size = 0; - for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) { - // If the operand is not a register default to the size of the previous - // operand. - // FIXME: Can't we pull the types from the MachineInstr rather than the - // operands. - if (MI.getOperand(Idx).isReg()) - Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI); - OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size)); + unsigned Size = 0; + for (unsigned Idx = 0; Idx < MI.getNumOperands(); ++Idx) { + // If the operand is not a register default to the size of the previous + // operand. + // FIXME: Can't we pull the types from the MachineInstr rather than the + // operands. + if (MI.getOperand(Idx).isReg()) + Size = getSizeInBits(MI.getOperand(Idx).getReg(), MRI, *TRI); + OpdsMapping.push_back(AMDGPU::getValueMapping(BankID, Size)); + } } - Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping)); - - return Mapping; + return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), + MI.getNumOperands()); } diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h index f13bde87ef2d..7c198a1b8a3f 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h @@ -44,7 +44,7 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo { /// See RegisterBankInfo::applyMapping. void applyMappingImpl(const OperandsMapper &OpdMapper) const override; - RegisterBankInfo::InstructionMapping + const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const; public: @@ -59,7 +59,8 @@ public: InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override; - InstructionMapping getInstrMapping(const MachineInstr &MI) const override; + const InstructionMapping & + getInstrMapping(const MachineInstr &MI) const override; }; } // End llvm namespace. #endif diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 86e3b37b09e9..1279f845de0e 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -353,7 +353,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, if (OffsetRegUsed && PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) { BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) - .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill); + .addReg(PreloadedScratchWaveOffsetReg, + MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill); } if (CopyBuffer && !CopyBufferFirst) { diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 853c8737b464..cc93c27731ff 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1042,6 +1042,7 @@ static void allocateHSAUserSGPRs(CCState &CCInfo, static void allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF, SIMachineFunctionInfo &Info, + CallingConv::ID CallConv, bool IsShader) { if (Info.hasWorkGroupIDX()) { unsigned Reg = Info.addWorkGroupIDX(); @@ -1072,8 +1073,15 @@ static void allocateSystemSGPRs(CCState &CCInfo, unsigned PrivateSegmentWaveByteOffsetReg; if (IsShader) { - PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); - Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); + PrivateSegmentWaveByteOffsetReg = + Info.getPrivateSegmentWaveByteOffsetSystemSGPR(); + + // This is true if the scratch wave byte offset doesn't have a fixed + // location. + if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) { + PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo); + Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg); + } } else PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset(); @@ -1310,7 +1318,7 @@ SDValue SITargetLowering::LowerFormalArguments( // Start adding system SGPRs. if (IsEntryFunc) - allocateSystemSGPRs(CCInfo, MF, *Info, IsShader); + allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsShader); reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info); diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 9122cd72d323..b5e3ce3dfe3e 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1087,7 +1087,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( (CntVal[LGKM_CNT] & AMDGPU::getLgkmcntBitMask(IV)))) { MachineLoop *ContainingLoop = MLI->getLoopFor(MI.getParent()); if (ContainingLoop) { - MachineBasicBlock *TBB = ContainingLoop->getTopBlock(); + MachineBasicBlock *TBB = ContainingLoop->getHeader(); BlockWaitcntBrackets *ScoreBracket = BlockWaitcntBracketsMap[TBB].get(); if (!ScoreBracket) { @@ -1097,7 +1097,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( } ScoreBracket->setRevisitLoop(true); DEBUG(dbgs() << "set-revisit: block" - << ContainingLoop->getTopBlock()->getNumber() << '\n';); + << ContainingLoop->getHeader()->getNumber() << '\n';); } } @@ -1758,12 +1758,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { // If we are walking into the block from before the loop, then guarantee // at least 1 re-walk over the loop to propagate the information, even if // no S_WAITCNT instructions were generated. - if (ContainingLoop && ContainingLoop->getTopBlock() == &MBB && J < I && + if (ContainingLoop && ContainingLoop->getHeader() == &MBB && J < I && (BlockWaitcntProcessedSet.find(&MBB) == BlockWaitcntProcessedSet.end())) { BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true); DEBUG(dbgs() << "set-revisit: block" - << ContainingLoop->getTopBlock()->getNumber() << '\n';); + << ContainingLoop->getHeader()->getNumber() << '\n';); } // Walk over the instructions. @@ -1774,7 +1774,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { // See if we want to revisit the loop. if (ContainingLoop && loopBottom(ContainingLoop) == &MBB) { - MachineBasicBlock *EntryBB = ContainingLoop->getTopBlock(); + MachineBasicBlock *EntryBB = ContainingLoop->getHeader(); BlockWaitcntBrackets *EntrySB = BlockWaitcntBracketsMap[EntryBB].get(); if (EntrySB && EntrySB->getRevisitLoop()) { EntrySB->setRevisitLoop(false); diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index b6a982aee6be..adebb8c4a1c5 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -122,9 +122,15 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) bool MaySpill = ST.isVGPRSpillingEnabled(*F); bool HasStackObjects = FrameInfo.hasStackObjects(); - if (HasStackObjects || MaySpill) + if (HasStackObjects || MaySpill) { PrivateSegmentWaveByteOffset = true; + // HS and GS always have the scratch wave offset in SGPR5 on GFX9. + if (ST.getGeneration() >= AMDGPUSubtarget::GFX9 && + (CC == CallingConv::AMDGPU_HS || CC == CallingConv::AMDGPU_GS)) + PrivateSegmentWaveByteOffsetSystemSGPR = AMDGPU::SGPR5; + } + if (ST.isAmdCodeObjectV2(MF)) { if (HasStackObjects || MaySpill) PrivateSegmentBuffer = true; |
