diff options
Diffstat (limited to 'lib/Target/AMDGPU/R600MachineScheduler.cpp')
-rw-r--r-- | lib/Target/AMDGPU/R600MachineScheduler.cpp | 136 |
1 files changed, 67 insertions, 69 deletions
diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index bcde5fb50dac9..db18e5bd1afae 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" +#include "R600InstrInfo.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Pass.h" @@ -26,7 +27,7 @@ using namespace llvm; void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); DAG = static_cast<ScheduleDAGMILive*>(dag); - const AMDGPUSubtarget &ST = DAG->MF.getSubtarget<AMDGPUSubtarget>(); + const R600Subtarget &ST = DAG->MF.getSubtarget<R600Subtarget>(); TII = static_cast<const R600InstrInfo*>(DAG->TII); TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); VLIW5 = !ST.hasCaymanISA(); @@ -48,8 +49,7 @@ void R600SchedStrategy::MoveUnits(std::vector<SUnit *> &QSrc, QSrc.clear(); } -static -unsigned getWFCountLimitedByGPR(unsigned GPRCount) { +static unsigned getWFCountLimitedByGPR(unsigned GPRCount) { assert (GPRCount && "GPRCount cannot be 0"); return 248 / GPRCount; } @@ -222,75 +222,74 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg, R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const { MachineInstr *MI = SU->getInstr(); - if (TII->isTransOnly(MI)) + if (TII->isTransOnly(*MI)) return AluTrans; - switch (MI->getOpcode()) { - case AMDGPU::PRED_X: - return AluPredX; - case AMDGPU::INTERP_PAIR_XY: - case AMDGPU::INTERP_PAIR_ZW: - case AMDGPU::INTERP_VEC_LOAD: - case AMDGPU::DOT_4: - return AluT_XYZW; - case AMDGPU::COPY: - if (MI->getOperand(1).isUndef()) { - // MI will become a KILL, don't considers it in scheduling - return AluDiscarded; - } - default: - break; - } - - // Does the instruction take a whole IG ? - // XXX: Is it possible to add a helper function in R600InstrInfo that can - // be used here and in R600PacketizerList::isSoloInstruction() ? - if(TII->isVector(*MI) || - TII->isCubeOp(MI->getOpcode()) || - TII->isReductionOp(MI->getOpcode()) || - MI->getOpcode() == AMDGPU::GROUP_BARRIER) { - return AluT_XYZW; + switch (MI->getOpcode()) { + case AMDGPU::PRED_X: + return AluPredX; + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + case AMDGPU::DOT_4: + return AluT_XYZW; + case AMDGPU::COPY: + if (MI->getOperand(1).isUndef()) { + // MI will become a KILL, don't considers it in scheduling + return AluDiscarded; } + default: + break; + } - if (TII->isLDSInstr(MI->getOpcode())) { - return AluT_X; - } + // Does the instruction take a whole IG ? + // XXX: Is it possible to add a helper function in R600InstrInfo that can + // be used here and in R600PacketizerList::isSoloInstruction() ? + if(TII->isVector(*MI) || + TII->isCubeOp(MI->getOpcode()) || + TII->isReductionOp(MI->getOpcode()) || + MI->getOpcode() == AMDGPU::GROUP_BARRIER) { + return AluT_XYZW; + } - // Is the result already assigned to a channel ? - unsigned DestSubReg = MI->getOperand(0).getSubReg(); - switch (DestSubReg) { - case AMDGPU::sub0: - return AluT_X; - case AMDGPU::sub1: - return AluT_Y; - case AMDGPU::sub2: - return AluT_Z; - case AMDGPU::sub3: - return AluT_W; - default: - break; - } + if (TII->isLDSInstr(MI->getOpcode())) { + return AluT_X; + } - // Is the result already member of a X/Y/Z/W class ? - unsigned DestReg = MI->getOperand(0).getReg(); - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || - regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) - return AluT_X; - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) - return AluT_Y; - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) - return AluT_Z; - if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) - return AluT_W; - if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) - return AluT_XYZW; - - // LDS src registers cannot be used in the Trans slot. - if (TII->readsLDSSrcReg(MI)) - return AluT_XYZW; - - return AluAny; + // Is the result already assigned to a channel ? + unsigned DestSubReg = MI->getOperand(0).getSubReg(); + switch (DestSubReg) { + case AMDGPU::sub0: + return AluT_X; + case AMDGPU::sub1: + return AluT_Y; + case AMDGPU::sub2: + return AluT_Z; + case AMDGPU::sub3: + return AluT_W; + default: + break; + } + // Is the result already member of a X/Y/Z/W class ? + unsigned DestReg = MI->getOperand(0).getReg(); + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) || + regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass)) + return AluT_X; + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass)) + return AluT_Y; + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass)) + return AluT_Z; + if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass)) + return AluT_W; + if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass)) + return AluT_XYZW; + + // LDS src registers cannot be used in the Trans slot. + if (TII->readsLDSSrcReg(*MI)) + return AluT_XYZW; + + return AluAny; } int R600SchedStrategy::getInstKind(SUnit* SU) { @@ -324,9 +323,8 @@ SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) { It != E; ++It) { SUnit *SU = *It; InstructionsGroupCandidate.push_back(SU->getInstr()); - if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) - && (!AnyALU || !TII->isVectorOnly(SU->getInstr())) - ) { + if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) && + (!AnyALU || !TII->isVectorOnly(*SU->getInstr()))) { InstructionsGroupCandidate.pop_back(); Q.erase((It + 1).base()); return SU; @@ -350,7 +348,7 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; -// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS) +// if (HwGen == R600Subtarget::NORTHERN_ISLANDS) // OccupedSlotsMask |= 16; InstructionsGroupCandidate.clear(); LoadAlu(); |