From 0b57cec536236d46e3dba9bd041533462f33dbb7 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Fri, 20 Dec 2019 19:53:05 +0000 Subject: Move all sources from the llvm project into contrib/llvm-project. This uses the new layout of the upstream repository, which was recently migrated to GitHub, and converted into a "monorepo". That is, most of the earlier separate sub-projects with their own branches and tags were consolidated into one top-level directory, and are now branched and tagged together. Updating the vendor area to match this layout is next. --- .../lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp | 722 +++++++++++++++++++++ 1 file changed, 722 insertions(+) create mode 100644 contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp') diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp new file mode 100644 index 000000000000..8098b81d1ea2 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -0,0 +1,722 @@ +//===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass compute turns all control flow pseudo instructions into native one +/// computing their address on the fly; it also sets STACK_SIZE info. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "r600cf" + +namespace { + +struct CFStack { + enum StackItem { + ENTRY = 0, + SUB_ENTRY = 1, + FIRST_NON_WQM_PUSH = 2, + FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 + }; + + const R600Subtarget *ST; + std::vector BranchStack; + std::vector LoopStack; + unsigned MaxStackSize; + unsigned CurrentEntries = 0; + unsigned CurrentSubEntries = 0; + + CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), + // We need to reserve a stack entry for CALL_FS in vertex shaders. + MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} + + unsigned getLoopDepth(); + bool branchStackContains(CFStack::StackItem); + bool requiresWorkAroundForInst(unsigned Opcode); + unsigned getSubEntrySize(CFStack::StackItem Item); + void updateMaxStackSize(); + void pushBranch(unsigned Opcode, bool isWQM = false); + void pushLoop(); + void popBranch(); + void popLoop(); +}; + +unsigned CFStack::getLoopDepth() { + return LoopStack.size(); +} + +bool CFStack::branchStackContains(CFStack::StackItem Item) { + for (std::vector::const_iterator I = BranchStack.begin(), + E = BranchStack.end(); I != E; ++I) { + if (*I == Item) + return true; + } + return false; +} + +bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { + if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && + getLoopDepth() > 1) + return true; + + if (!ST->hasCFAluBug()) + return false; + + switch(Opcode) { + default: return false; + case R600::CF_ALU_PUSH_BEFORE: + case R600::CF_ALU_ELSE_AFTER: + case R600::CF_ALU_BREAK: + case R600::CF_ALU_CONTINUE: + if (CurrentSubEntries == 0) + return false; + if (ST->getWavefrontSize() == 64) { + // We are being conservative here. We only require this work-around if + // CurrentSubEntries > 3 && + // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) + // + // We have to be conservative, because we don't know for certain that + // our stack allocation algorithm for Evergreen/NI is correct. Applying this + // work-around when CurrentSubEntries > 3 allows us to over-allocate stack + // resources without any problems. + return CurrentSubEntries > 3; + } else { + assert(ST->getWavefrontSize() == 32); + // We are being conservative here. We only require the work-around if + // CurrentSubEntries > 7 && + // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) + // See the comment on the wavefront size == 64 case for why we are + // being conservative. + return CurrentSubEntries > 7; + } + } +} + +unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { + switch(Item) { + default: + return 0; + case CFStack::FIRST_NON_WQM_PUSH: + assert(!ST->hasCaymanISA()); + if (ST->getGeneration() <= AMDGPUSubtarget::R700) { + // +1 For the push operation. + // +2 Extra space required. + return 3; + } else { + // Some documentation says that this is not necessary on Evergreen, + // but experimentation has show that we need to allocate 1 extra + // sub-entry for the first non-WQM push. + // +1 For the push operation. + // +1 Extra space required. + return 2; + } + case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: + assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); + // +1 For the push operation. + // +1 Extra space required. + return 2; + case CFStack::SUB_ENTRY: + return 1; + } +} + +void CFStack::updateMaxStackSize() { + unsigned CurrentStackSize = + CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4); + MaxStackSize = std::max(CurrentStackSize, MaxStackSize); +} + +void CFStack::pushBranch(unsigned Opcode, bool isWQM) { + CFStack::StackItem Item = CFStack::ENTRY; + switch(Opcode) { + case R600::CF_PUSH_EG: + case R600::CF_ALU_PUSH_BEFORE: + if (!isWQM) { + if (!ST->hasCaymanISA() && + !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) + Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI + // See comment in + // CFStack::getSubEntrySize() + else if (CurrentEntries > 0 && + ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && + !ST->hasCaymanISA() && + !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) + Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; + else + Item = CFStack::SUB_ENTRY; + } else + Item = CFStack::ENTRY; + break; + } + BranchStack.push_back(Item); + if (Item == CFStack::ENTRY) + CurrentEntries++; + else + CurrentSubEntries += getSubEntrySize(Item); + updateMaxStackSize(); +} + +void CFStack::pushLoop() { + LoopStack.push_back(CFStack::ENTRY); + CurrentEntries++; + updateMaxStackSize(); +} + +void CFStack::popBranch() { + CFStack::StackItem Top = BranchStack.back(); + if (Top == CFStack::ENTRY) + CurrentEntries--; + else + CurrentSubEntries-= getSubEntrySize(Top); + BranchStack.pop_back(); +} + +void CFStack::popLoop() { + CurrentEntries--; + LoopStack.pop_back(); +} + +class R600ControlFlowFinalizer : public MachineFunctionPass { +private: + using ClauseFile = std::pair>; + + enum ControlFlowInstruction { + CF_TC, + CF_VC, + CF_CALL_FS, + CF_WHILE_LOOP, + CF_END_LOOP, + CF_LOOP_BREAK, + CF_LOOP_CONTINUE, + CF_JUMP, + CF_ELSE, + CF_POP, + CF_END + }; + + const R600InstrInfo *TII = nullptr; + const R600RegisterInfo *TRI = nullptr; + unsigned MaxFetchInst; + const R600Subtarget *ST = nullptr; + + bool IsTrivialInst(MachineInstr &MI) const { + switch (MI.getOpcode()) { + case R600::KILL: + case R600::RETURN: + return true; + default: + return false; + } + } + + const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { + unsigned Opcode = 0; + bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); + switch (CFI) { + case CF_TC: + Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; + break; + case CF_VC: + Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; + break; + case CF_CALL_FS: + Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; + break; + case CF_WHILE_LOOP: + Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; + break; + case CF_END_LOOP: + Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; + break; + case CF_LOOP_BREAK: + Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; + break; + case CF_LOOP_CONTINUE: + Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; + break; + case CF_JUMP: + Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; + break; + case CF_ELSE: + Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; + break; + case CF_POP: + Opcode = isEg ? R600::POP_EG : R600::POP_R600; + break; + case CF_END: + if (ST->hasCaymanISA()) { + Opcode = R600::CF_END_CM; + break; + } + Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; + break; + } + assert (Opcode && "No opcode selected"); + return TII->get(Opcode); + } + + bool isCompatibleWithClause(const MachineInstr &MI, + std::set &DstRegs) const { + unsigned DstMI, SrcMI; + for (MachineInstr::const_mop_iterator I = MI.operands_begin(), + E = MI.operands_end(); + I != E; ++I) { + const MachineOperand &MO = *I; + if (!MO.isReg()) + continue; + if (MO.isDef()) { + unsigned Reg = MO.getReg(); + if (R600::R600_Reg128RegClass.contains(Reg)) + DstMI = Reg; + else + DstMI = TRI->getMatchingSuperReg(Reg, + AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), + &R600::R600_Reg128RegClass); + } + if (MO.isUse()) { + unsigned Reg = MO.getReg(); + if (R600::R600_Reg128RegClass.contains(Reg)) + SrcMI = Reg; + else + SrcMI = TRI->getMatchingSuperReg(Reg, + AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), + &R600::R600_Reg128RegClass); + } + } + if ((DstRegs.find(SrcMI) == DstRegs.end())) { + DstRegs.insert(DstMI); + return true; + } else + return false; + } + + ClauseFile + MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) + const { + MachineBasicBlock::iterator ClauseHead = I; + std::vector ClauseContent; + unsigned AluInstCount = 0; + bool IsTex = TII->usesTextureCache(*ClauseHead); + std::set DstRegs; + for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { + if (IsTrivialInst(*I)) + continue; + if (AluInstCount >= MaxFetchInst) + break; + if ((IsTex && !TII->usesTextureCache(*I)) || + (!IsTex && !TII->usesVertexCache(*I))) + break; + if (!isCompatibleWithClause(*I, DstRegs)) + break; + AluInstCount ++; + ClauseContent.push_back(&*I); + } + MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), + getHWInstrDesc(IsTex?CF_TC:CF_VC)) + .addImm(0) // ADDR + .addImm(AluInstCount - 1); // COUNT + return ClauseFile(MIb, std::move(ClauseContent)); + } + + void getLiteral(MachineInstr &MI, std::vector &Lits) const { + static const unsigned LiteralRegs[] = { + R600::ALU_LITERAL_X, + R600::ALU_LITERAL_Y, + R600::ALU_LITERAL_Z, + R600::ALU_LITERAL_W + }; + const SmallVector, 3> Srcs = + TII->getSrcs(MI); + for (const auto &Src:Srcs) { + if (Src.first->getReg() != R600::ALU_LITERAL_X) + continue; + int64_t Imm = Src.second; + std::vector::iterator It = + llvm::find_if(Lits, [&](MachineOperand *val) { + return val->isImm() && (val->getImm() == Imm); + }); + + // Get corresponding Operand + MachineOperand &Operand = MI.getOperand( + TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); + + if (It != Lits.end()) { + // Reuse existing literal reg + unsigned Index = It - Lits.begin(); + Src.first->setReg(LiteralRegs[Index]); + } else { + // Allocate new literal reg + assert(Lits.size() < 4 && "Too many literals in Instruction Group"); + Src.first->setReg(LiteralRegs[Lits.size()]); + Lits.push_back(&Operand); + } + } + } + + MachineBasicBlock::iterator insertLiterals( + MachineBasicBlock::iterator InsertPos, + const std::vector &Literals) const { + MachineBasicBlock *MBB = InsertPos->getParent(); + for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { + unsigned LiteralPair0 = Literals[i]; + unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; + InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), + TII->get(R600::LITERALS)) + .addImm(LiteralPair0) + .addImm(LiteralPair1); + } + return InsertPos; + } + + ClauseFile + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) + const { + MachineInstr &ClauseHead = *I; + std::vector ClauseContent; + I++; + for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { + if (IsTrivialInst(*I)) { + ++I; + continue; + } + if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) + break; + std::vectorLiterals; + if (I->isBundle()) { + MachineInstr &DeleteMI = *I; + MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); + while (++BI != E && BI->isBundledWithPred()) { + BI->unbundleFromPred(); + for (MachineOperand &MO : BI->operands()) { + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + getLiteral(*BI, Literals); + ClauseContent.push_back(&*BI); + } + I = BI; + DeleteMI.eraseFromParent(); + } else { + getLiteral(*I, Literals); + ClauseContent.push_back(&*I); + I++; + } + for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { + MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), + TII->get(R600::LITERALS)); + if (Literals[i]->isImm()) { + MILit.addImm(Literals[i]->getImm()); + } else { + MILit.addGlobalAddress(Literals[i]->getGlobal(), + Literals[i]->getOffset()); + } + if (i + 1 < e) { + if (Literals[i + 1]->isImm()) { + MILit.addImm(Literals[i + 1]->getImm()); + } else { + MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), + Literals[i + 1]->getOffset()); + } + } else + MILit.addImm(0); + ClauseContent.push_back(MILit); + } + } + assert(ClauseContent.size() < 128 && "ALU clause is too big"); + ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); + return ClauseFile(&ClauseHead, std::move(ClauseContent)); + } + + void EmitFetchClause(MachineBasicBlock::iterator InsertPos, + const DebugLoc &DL, ClauseFile &Clause, + unsigned &CfCount) { + CounterPropagateAddr(*Clause.first, CfCount); + MachineBasicBlock *BB = Clause.first->getParent(); + BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); + for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { + BB->splice(InsertPos, BB, Clause.second[i]); + } + CfCount += 2 * Clause.second.size(); + } + + void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, + ClauseFile &Clause, unsigned &CfCount) { + Clause.first->getOperand(0).setImm(0); + CounterPropagateAddr(*Clause.first, CfCount); + MachineBasicBlock *BB = Clause.first->getParent(); + BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); + for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { + BB->splice(InsertPos, BB, Clause.second[i]); + } + CfCount += Clause.second.size(); + } + + void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { + MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); + } + void CounterPropagateAddr(const std::set &MIs, + unsigned Addr) const { + for (MachineInstr *MI : MIs) { + CounterPropagateAddr(*MI, Addr); + } + } + +public: + static char ID; + + R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + ST = &MF.getSubtarget(); + MaxFetchInst = ST->getTexVTXClauseSize(); + TII = ST->getInstrInfo(); + TRI = ST->getRegisterInfo(); + + R600MachineFunctionInfo *MFI = MF.getInfo(); + + CFStack CFStack(ST, MF.getFunction().getCallingConv()); + for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; + ++MB) { + MachineBasicBlock &MBB = *MB; + unsigned CfCount = 0; + std::vector>> LoopStack; + std::vector IfThenElseStack; + if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { + BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), + getHWInstrDesc(CF_CALL_FS)); + CfCount++; + } + std::vector FetchClauses, AluClauses; + std::vector LastAlu(1); + std::vector ToPopAfter; + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E;) { + if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { + LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); + FetchClauses.push_back(MakeFetchClause(MBB, I)); + CfCount++; + LastAlu.back() = nullptr; + continue; + } + + MachineBasicBlock::iterator MI = I; + if (MI->getOpcode() != R600::ENDIF) + LastAlu.back() = nullptr; + if (MI->getOpcode() == R600::CF_ALU) + LastAlu.back() = &*MI; + I++; + bool RequiresWorkAround = + CFStack.requiresWorkAroundForInst(MI->getOpcode()); + switch (MI->getOpcode()) { + case R600::CF_ALU_PUSH_BEFORE: + if (RequiresWorkAround) { + LLVM_DEBUG(dbgs() + << "Applying bug work-around for ALU_PUSH_BEFORE\n"); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) + .addImm(CfCount + 1) + .addImm(1); + MI->setDesc(TII->get(R600::CF_ALU)); + CfCount++; + CFStack.pushBranch(R600::CF_PUSH_EG); + } else + CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); + LLVM_FALLTHROUGH; + case R600::CF_ALU: + I = MI; + AluClauses.push_back(MakeALUClause(MBB, I)); + LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); + CfCount++; + break; + case R600::WHILELOOP: { + CFStack.pushLoop(); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_WHILE_LOOP)) + .addImm(1); + std::pair> Pair(CfCount, + std::set()); + Pair.second.insert(MIb); + LoopStack.push_back(std::move(Pair)); + MI->eraseFromParent(); + CfCount++; + break; + } + case R600::ENDLOOP: { + CFStack.popLoop(); + std::pair> Pair = + std::move(LoopStack.back()); + LoopStack.pop_back(); + CounterPropagateAddr(Pair.second, CfCount); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) + .addImm(Pair.first + 1); + MI->eraseFromParent(); + CfCount++; + break; + } + case R600::IF_PREDICATE_SET: { + LastAlu.push_back(nullptr); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_JUMP)) + .addImm(0) + .addImm(0); + IfThenElseStack.push_back(MIb); + LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + MI->eraseFromParent(); + CfCount++; + break; + } + case R600::ELSE: { + MachineInstr * JumpInst = IfThenElseStack.back(); + IfThenElseStack.pop_back(); + CounterPropagateAddr(*JumpInst, CfCount); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_ELSE)) + .addImm(0) + .addImm(0); + LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + IfThenElseStack.push_back(MIb); + MI->eraseFromParent(); + CfCount++; + break; + } + case R600::ENDIF: { + CFStack.popBranch(); + if (LastAlu.back()) { + ToPopAfter.push_back(LastAlu.back()); + } else { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_POP)) + .addImm(CfCount + 1) + .addImm(1); + (void)MIb; + LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); + CfCount++; + } + + MachineInstr *IfOrElseInst = IfThenElseStack.back(); + IfThenElseStack.pop_back(); + CounterPropagateAddr(*IfOrElseInst, CfCount); + IfOrElseInst->getOperand(1).setImm(1); + LastAlu.pop_back(); + MI->eraseFromParent(); + break; + } + case R600::BREAK: { + CfCount ++; + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_LOOP_BREAK)) + .addImm(0); + LoopStack.back().second.insert(MIb); + MI->eraseFromParent(); + break; + } + case R600::CONTINUE: { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + getHWInstrDesc(CF_LOOP_CONTINUE)) + .addImm(0); + LoopStack.back().second.insert(MIb); + MI->eraseFromParent(); + CfCount++; + break; + } + case R600::RETURN: { + DebugLoc DL = MBB.findDebugLoc(MI); + BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); + CfCount++; + if (CfCount % 2) { + BuildMI(MBB, I, DL, TII->get(R600::PAD)); + CfCount++; + } + MI->eraseFromParent(); + for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) + EmitFetchClause(I, DL, FetchClauses[i], CfCount); + for (unsigned i = 0, e = AluClauses.size(); i < e; i++) + EmitALUClause(I, DL, AluClauses[i], CfCount); + break; + } + default: + if (TII->isExport(MI->getOpcode())) { + LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); + CfCount++; + } + break; + } + } + for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { + MachineInstr *Alu = ToPopAfter[i]; + BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), + TII->get(R600::CF_ALU_POP_AFTER)) + .addImm(Alu->getOperand(0).getImm()) + .addImm(Alu->getOperand(1).getImm()) + .addImm(Alu->getOperand(2).getImm()) + .addImm(Alu->getOperand(3).getImm()) + .addImm(Alu->getOperand(4).getImm()) + .addImm(Alu->getOperand(5).getImm()) + .addImm(Alu->getOperand(6).getImm()) + .addImm(Alu->getOperand(7).getImm()) + .addImm(Alu->getOperand(8).getImm()); + Alu->eraseFromParent(); + } + MFI->CFStackSize = CFStack.MaxStackSize; + } + + return false; + } + + StringRef getPassName() const override { + return "R600 Control Flow Finalizer Pass"; + } +}; + +} // end anonymous namespace + +INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, + "R600 Control Flow Finalizer", false, false) +INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, + "R600 Control Flow Finalizer", false, false) + +char R600ControlFlowFinalizer::ID = 0; + +char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; + +FunctionPass *llvm::createR600ControlFlowFinalizer() { + return new R600ControlFlowFinalizer(); +} -- cgit v1.2.3