diff options
Diffstat (limited to 'lib/Target/AMDGPU/SIInsertWaitcnts.cpp')
| -rw-r--r-- | lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 249 |
1 files changed, 110 insertions, 139 deletions
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 0f009a48754ad..6bbe5979316da 100644 --- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1,4 +1,4 @@ -//===-- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===/ +//===- SIInsertWaitcnts.cpp - Insert Wait Instructions --------------------===// // // The LLVM Compiler Infrastructure // @@ -21,12 +21,34 @@ #include "SIDefines.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <memory> +#include <utility> +#include <vector> #define DEBUG_TYPE "si-insert-waitcnts" @@ -42,7 +64,7 @@ namespace { enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, NUM_INST_CNTS }; -typedef std::pair<signed, signed> RegInterval; +using RegInterval = std::pair<signed, signed>; struct { int32_t VmcntMax; @@ -101,6 +123,15 @@ enum RegisterMapping { // "s_waitcnt 0" before use. class BlockWaitcntBrackets { public: + BlockWaitcntBrackets() { + for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; + T = (enum InstCounterType)(T + 1)) { + memset(VgprScores[T], 0, sizeof(VgprScores[T])); + } + } + + ~BlockWaitcntBrackets() = default; + static int32_t getWaitCountMax(InstCounterType T) { switch (T) { case VM_CNT: @@ -113,14 +144,14 @@ public: break; } return 0; - }; + } void setScoreLB(InstCounterType T, int32_t Val) { assert(T < NUM_INST_CNTS); if (T >= NUM_INST_CNTS) return; ScoreLBs[T] = Val; - }; + } void setScoreUB(InstCounterType T, int32_t Val) { assert(T < NUM_INST_CNTS); @@ -132,21 +163,21 @@ public: if (ScoreLBs[T] < UB) ScoreLBs[T] = UB; } - }; + } int32_t getScoreLB(InstCounterType T) { assert(T < NUM_INST_CNTS); if (T >= NUM_INST_CNTS) return 0; return ScoreLBs[T]; - }; + } int32_t getScoreUB(InstCounterType T) { assert(T < NUM_INST_CNTS); if (T >= NUM_INST_CNTS) return 0; return ScoreUBs[T]; - }; + } // Mapping from event to counter. InstCounterType eventCounter(WaitEventType E) { @@ -218,26 +249,18 @@ public: void setEventUB(enum WaitEventType W, int32_t Val) { EventUBs[W] = Val; } int32_t getMaxVGPR() const { return VgprUB; } int32_t getMaxSGPR() const { return SgprUB; } + int32_t getEventUB(enum WaitEventType W) const { assert(W < NUM_WAIT_EVENTS); return EventUBs[W]; } + bool counterOutOfOrder(InstCounterType T); unsigned int updateByWait(InstCounterType T, int ScoreToWait); void updateByEvent(const SIInstrInfo *TII, const SIRegisterInfo *TRI, const MachineRegisterInfo *MRI, WaitEventType E, MachineInstr &MI); - BlockWaitcntBrackets() - : WaitAtBeginning(false), RevisitLoop(false), ValidLoop(false), MixedExpTypes(false), - LoopRegion(NULL), PostOrder(0), Waitcnt(NULL), VgprUB(0), SgprUB(0) { - for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; - T = (enum InstCounterType)(T + 1)) { - memset(VgprScores[T], 0, sizeof(VgprScores[T])); - } - } - ~BlockWaitcntBrackets(){}; - bool hasPendingSMEM() const { return (EventUBs[SMEM_ACCESS] > ScoreLBs[LGKM_CNT] && EventUBs[SMEM_ACCESS] <= ScoreUBs[LGKM_CNT]); @@ -266,7 +289,7 @@ public: int32_t getPostOrder() const { return PostOrder; } void setWaitcnt(MachineInstr *WaitcntIn) { Waitcnt = WaitcntIn; } - void clearWaitcnt() { Waitcnt = NULL; } + void clearWaitcnt() { Waitcnt = nullptr; } MachineInstr *getWaitcnt() const { return Waitcnt; } bool mixedExpTypes() const { return MixedExpTypes; } @@ -278,13 +301,11 @@ public: void dump() { print(dbgs()); } private: - bool WaitAtBeginning; - bool RevisitLoop; - bool ValidLoop; - bool MixedExpTypes; - MachineLoop *LoopRegion; - int32_t PostOrder; - MachineInstr *Waitcnt; + bool WaitAtBeginning = false; + bool RevisitLoop = false; + bool MixedExpTypes = false; + int32_t PostOrder = 0; + MachineInstr *Waitcnt = nullptr; int32_t ScoreLBs[NUM_INST_CNTS] = {0}; int32_t ScoreUBs[NUM_INST_CNTS] = {0}; int32_t EventUBs[NUM_WAIT_EVENTS] = {0}; @@ -292,8 +313,8 @@ private: int32_t LastFlat[NUM_INST_CNTS] = {0}; // wait_cnt scores for every vgpr. // Keep track of the VgprUB and SgprUB to make merge at join efficient. - int32_t VgprUB; - int32_t SgprUB; + int32_t VgprUB = 0; + int32_t SgprUB = 0; int32_t VgprScores[NUM_INST_CNTS][NUM_ALL_VGPRS]; // Wait cnt scores for every sgpr, only lgkmcnt is relevant. int32_t SgprScores[SQ_MAX_PGM_SGPRS] = {0}; @@ -306,38 +327,36 @@ private: // at the end of the loop footer. class LoopWaitcntData { public: + LoopWaitcntData() = default; + ~LoopWaitcntData() = default; + void incIterCnt() { IterCnt++; } void resetIterCnt() { IterCnt = 0; } int32_t getIterCnt() { return IterCnt; } - LoopWaitcntData() : LfWaitcnt(NULL), IterCnt(0) {} - ~LoopWaitcntData(){}; - void setWaitcnt(MachineInstr *WaitcntIn) { LfWaitcnt = WaitcntIn; } MachineInstr *getWaitcnt() const { return LfWaitcnt; } void print() { DEBUG(dbgs() << " iteration " << IterCnt << '\n';); - return; } private: // s_waitcnt added at the end of loop footer to stablize wait scores // at the end of the loop footer. - MachineInstr *LfWaitcnt; + MachineInstr *LfWaitcnt = nullptr; // Number of iterations the loop has been visited, not including the initial // walk over. - int32_t IterCnt; + int32_t IterCnt = 0; }; class SIInsertWaitcnts : public MachineFunctionPass { - private: - const SISubtarget *ST; - const SIInstrInfo *TII; - const SIRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - const MachineLoopInfo *MLI; + const SISubtarget *ST = nullptr; + const SIInstrInfo *TII = nullptr; + const SIRegisterInfo *TRI = nullptr; + const MachineRegisterInfo *MRI = nullptr; + const MachineLoopInfo *MLI = nullptr; AMDGPU::IsaInfo::IsaVersion IV; AMDGPUAS AMDGPUASI; @@ -357,9 +376,7 @@ private: public: static char ID; - SIInsertWaitcnts() - : MachineFunctionPass(ID), ST(nullptr), TII(nullptr), TRI(nullptr), - MRI(nullptr), MLI(nullptr) {} + SIInsertWaitcnts() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -376,9 +393,11 @@ public: void addKillWaitBracket(BlockWaitcntBrackets *Bracket) { // The waitcnt information is copied because it changes as the block is // traversed. - KillWaitBrackets.push_back(make_unique<BlockWaitcntBrackets>(*Bracket)); + KillWaitBrackets.push_back( + llvm::make_unique<BlockWaitcntBrackets>(*Bracket)); } + bool mayAccessLDSThroughFlat(const MachineInstr &MI) const; MachineInstr *generateSWaitCntInstBefore(MachineInstr &MI, BlockWaitcntBrackets *ScoreBrackets); void updateEventWaitCntAfter(MachineInstr &Inst, @@ -389,7 +408,7 @@ public: void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst); }; -} // End anonymous namespace. +} // end anonymous namespace RegInterval BlockWaitcntBrackets::getRegInterval(const MachineInstr *MI, const SIInstrInfo *TII, @@ -567,13 +586,13 @@ void BlockWaitcntBrackets::updateByEvent(const SIInstrInfo *TII, } #if 0 // TODO: check if this is handled by MUBUF code above. } else if (Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORD || - Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX2 || - Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) { + Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX2 || + Inst.getOpcode() == AMDGPU::BUFFER_STORE_DWORDX4) { MachineOperand *MO = TII->getNamedOperand(Inst, AMDGPU::OpName::data); unsigned OpNo;//TODO: find the OpNo for this operand; RegInterval Interval = getRegInterval(&Inst, TII, MRI, TRI, OpNo, false); for (signed RegNo = Interval.first; RegNo < Interval.second; - ++RegNo) { + ++RegNo) { setRegScore(RegNo + NUM_ALL_VGPRS, t, CurrScore); } #endif @@ -642,7 +661,6 @@ void BlockWaitcntBrackets::print(raw_ostream &OS) { OS << '\n'; } OS << '\n'; - return; } unsigned int BlockWaitcntBrackets::updateByWait(InstCounterType T, @@ -860,7 +878,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( switch (src_type) { case SCMEM_LDS: if (group_is_multi_wave || - context->OptFlagIsOn(OPT_R1100_LDSMEM_FENCE_CHICKEN_BIT)) { + context->OptFlagIsOn(OPT_R1100_LDSMEM_FENCE_CHICKEN_BIT)) { EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT, ScoreBrackets->getScoreUB(LGKM_CNT)); // LDS may have to wait for VM_CNT after buffer load to LDS @@ -874,9 +892,9 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( case SCMEM_GDS: if (group_is_multi_wave || fence_is_global) { EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT, - ScoreBrackets->getScoreUB(EXP_CNT)); + ScoreBrackets->getScoreUB(EXP_CNT)); EmitSwaitcnt |= ScoreBrackets->updateByWait(LGKM_CNT, - ScoreBrackets->getScoreUB(LGKM_CNT)); + ScoreBrackets->getScoreUB(LGKM_CNT)); } break; @@ -886,9 +904,9 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( case SCMEM_SCATTER: if (group_is_multi_wave || fence_is_global) { EmitSwaitcnt |= ScoreBrackets->updateByWait(EXP_CNT, - ScoreBrackets->getScoreUB(EXP_CNT)); + ScoreBrackets->getScoreUB(EXP_CNT)); EmitSwaitcnt |= ScoreBrackets->updateByWait(VM_CNT, - ScoreBrackets->getScoreUB(VM_CNT)); + ScoreBrackets->getScoreUB(VM_CNT)); } break; @@ -927,13 +945,14 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( // before the call. if (MI.getOpcode() == SC_CALL) { if (ScoreBrackets->getScoreUB(EXP_CNT) > - ScoreBrackets->getScoreLB(EXP_CNT)) { + ScoreBrackets->getScoreLB(EXP_CNT)) { ScoreBrackets->setScoreLB(EXP_CNT, ScoreBrackets->getScoreUB(EXP_CNT)); EmitSwaitcnt |= CNT_MASK(EXP_CNT); } } #endif + // FIXME: Should not be relying on memoperands. // Look at the source operands of every instruction to see if // any of them results from a previous memory operation that affects // its current usage. If so, an s_waitcnt instruction needs to be @@ -949,6 +968,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( EmitSwaitcnt |= ScoreBrackets->updateByWait( VM_CNT, ScoreBrackets->getRegScore(RegNo, VM_CNT)); } + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { const MachineOperand &Op = MI.getOperand(I); const MachineRegisterInfo &MRIA = *MRI; @@ -973,6 +993,7 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( // 2) If a destination operand that was used by a recent export/store ins, // add s_waitcnt on exp_cnt to guarantee the WAR order. if (MI.mayStore()) { + // FIXME: Should not be relying on memoperands. for (const MachineMemOperand *Memop : MI.memoperands()) { unsigned AS = Memop->getAddrSpace(); if (AS != AMDGPUASI.LOCAL_ADDRESS) @@ -1094,7 +1115,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore( BlockWaitcntBracketsMap[TBB].get(); if (!ScoreBracket) { assert(BlockVisitedSet.find(TBB) == BlockVisitedSet.end()); - BlockWaitcntBracketsMap[TBB] = make_unique<BlockWaitcntBrackets>(); + BlockWaitcntBracketsMap[TBB] = + llvm::make_unique<BlockWaitcntBrackets>(); ScoreBracket = BlockWaitcntBracketsMap[TBB].get(); } ScoreBracket->setRevisitLoop(true); @@ -1141,8 +1163,21 @@ void SIInsertWaitcnts::insertWaitcntBeforeCF(MachineBasicBlock &MBB, } else { MBB.push_back(Waitcnt); } +} + +// This is a flat memory operation. Check to see if it has memory +// tokens for both LDS and Memory, and if so mark it as a flat. +bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const { + if (MI.memoperands_empty()) + return true; + + for (const MachineMemOperand *Memop : MI.memoperands()) { + unsigned AS = Memop->getAddrSpace(); + if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) + return true; + } - return; + return false; } void SIInsertWaitcnts::updateEventWaitCntAfter( @@ -1151,10 +1186,8 @@ void SIInsertWaitcnts::updateEventWaitCntAfter( // instruction, update the upper-bound of the appropriate counter's // bracket and the destination operand scores. // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere. - uint64_t TSFlags = Inst.getDesc().TSFlags; - if (TII->isDS(Inst) && (TSFlags & SIInstrFlags::LGKM_CNT)) { - if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds) && - TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) { + if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) { + if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) { ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst); ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst); } else { @@ -1162,23 +1195,18 @@ void SIInsertWaitcnts::updateEventWaitCntAfter( } } else if (TII->isFLAT(Inst)) { assert(Inst.mayLoad() || Inst.mayStore()); - ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); - ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst); - // This is a flat memory operation. Check to see if it has memory - // tokens for both LDS and Memory, and if so mark it as a flat. - bool FoundLDSMem = false; - for (const MachineMemOperand *Memop : Inst.memoperands()) { - unsigned AS = Memop->getAddrSpace(); - if (AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) - FoundLDSMem = true; - } + if (TII->usesVM_CNT(Inst)) + ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); + + if (TII->usesLGKM_CNT(Inst)) { + ScoreBrackets->updateByEvent(TII, TRI, MRI, LDS_ACCESS, Inst); - // This is a flat memory operation, so note it - it will require - // that both the VM and LGKM be flushed to zero if it is pending when - // a VM or LGKM dependency occurs. - if (FoundLDSMem) { - ScoreBrackets->setPendingFlat(); + // This is a flat memory operation, so note it - it will require + // that both the VM and LGKM be flushed to zero if it is pending when + // a VM or LGKM dependency occurs. + if (mayAccessLDSThroughFlat(Inst)) + ScoreBrackets->setPendingFlat(); } } else if (SIInstrInfo::isVMEM(Inst) && // TODO: get a better carve out. @@ -1241,7 +1269,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { BlockWaitcntBracketsMap[pred].get(); bool Visited = BlockVisitedSet.find(pred) != BlockVisitedSet.end(); if (!Visited || PredScoreBrackets->getWaitAtBeginning()) { - break; + continue; } for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS; T = (enum InstCounterType)(T + 1)) { @@ -1280,7 +1308,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { BlockWaitcntBracketsMap[Pred].get(); bool Visited = BlockVisitedSet.find(Pred) != BlockVisitedSet.end(); if (!Visited || PredScoreBrackets->getWaitAtBeginning()) { - break; + continue; } int GDSSpan = PredScoreBrackets->getEventUB(GDS_GPR_LOCK) - @@ -1327,7 +1355,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { // Set the register scoreboard. for (MachineBasicBlock *Pred : Block.predecessors()) { if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) { - break; + continue; } BlockWaitcntBrackets *PredScoreBrackets = @@ -1441,7 +1469,7 @@ void SIInsertWaitcnts::mergeInputScoreBrackets(MachineBasicBlock &Block) { // the delayed nature of these operations. for (MachineBasicBlock *Pred : Block.predecessors()) { if (BlockVisitedSet.find(Pred) == BlockVisitedSet.end()) { - break; + continue; } BlockWaitcntBrackets *PredScoreBrackets = @@ -1494,8 +1522,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, ScoreBrackets->dump(); }); - bool InsertNOP = false; - // Walk over the instructions. for (MachineBasicBlock::iterator Iter = Block.begin(), E = Block.end(); Iter != E;) { @@ -1555,7 +1581,7 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, if (RequireCheckResourceType(Inst, context)) { // Force the score to as if an S_WAITCNT vmcnt(0) is emitted. ScoreBrackets->setScoreLB(VM_CNT, - ScoreBrackets->getScoreUB(VM_CNT)); + ScoreBrackets->getScoreUB(VM_CNT)); } #endif @@ -1596,58 +1622,6 @@ void SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, VCCZBugHandledSet.insert(&Inst); } - if (ST->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { - - // This avoids a s_nop after a waitcnt has just been inserted. - if (!SWaitInst && InsertNOP) { - BuildMI(Block, Inst, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); - } - InsertNOP = false; - - // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM - // or SMEM clause, respectively. - // - // The temporary workaround is to break the clauses with S_NOP. - // - // The proper solution would be to allocate registers such that all source - // and destination registers don't overlap, e.g. this is illegal: - // r0 = load r2 - // r2 = load r0 - bool IsSMEM = false; - bool IsVMEM = false; - if (TII->isSMRD(Inst)) - IsSMEM = true; - else if (TII->usesVM_CNT(Inst)) - IsVMEM = true; - - ++Iter; - if (Iter == E) - break; - - MachineInstr &Next = *Iter; - - // TODO: How about consecutive SMEM instructions? - // The comments above says break the clause but the code does not. - // if ((TII->isSMRD(next) && isSMEM) || - if (!IsSMEM && TII->usesVM_CNT(Next) && IsVMEM && - // TODO: Enable this check when hasSoftClause is upstreamed. - // ST->hasSoftClauses() && - ST->isXNACKEnabled()) { - // Insert a NOP to break the clause. - InsertNOP = true; - continue; - } - - // There must be "S_NOP 0" between an instruction writing M0 and - // S_SENDMSG. - if ((Next.getOpcode() == AMDGPU::S_SENDMSG || - Next.getOpcode() == AMDGPU::S_SENDMSGHALT) && - Inst.definesRegister(AMDGPU::M0)) - InsertNOP = true; - - continue; - } - ++Iter; } @@ -1752,13 +1726,13 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { BlockWaitcntBrackets *ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get(); if (!ScoreBrackets) { - BlockWaitcntBracketsMap[&MBB] = make_unique<BlockWaitcntBrackets>(); + BlockWaitcntBracketsMap[&MBB] = llvm::make_unique<BlockWaitcntBrackets>(); ScoreBrackets = BlockWaitcntBracketsMap[&MBB].get(); } ScoreBrackets->setPostOrder(MBB.getNumber()); MachineLoop *ContainingLoop = MLI->getLoopFor(&MBB); if (ContainingLoop && LoopWaitcntDataMap[ContainingLoop] == nullptr) - LoopWaitcntDataMap[ContainingLoop] = make_unique<LoopWaitcntData>(); + LoopWaitcntDataMap[ContainingLoop] = llvm::make_unique<LoopWaitcntData>(); // If we are walking into the block from before the loop, then guarantee // at least 1 re-walk over the loop to propagate the information, even if @@ -1819,12 +1793,10 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { - MachineBasicBlock &MBB = *BI; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - if (!HaveScalarStores && TII->isScalarStore(*I)) HaveScalarStores = true; @@ -1847,7 +1819,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - if (I->getOpcode() == AMDGPU::S_DCACHE_WB) SeenDCacheWB = true; else if (TII->isScalarStore(*I)) |
