aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/R600/SIInsertWaits.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/R600/SIInsertWaits.cpp')
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp358
1 files changed, 358 insertions, 0 deletions
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
new file mode 100644
index 000000000000..98bd3dbb6646
--- /dev/null
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -0,0 +1,358 @@
+//===-- SILowerControlFlow.cpp - Use predicates for control flow ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief Insert wait instructions for memory reads and writes.
+///
+/// Memory reads and writes are issued asynchronously, so we need to insert
+/// S_WAITCNT instructions when we want to access any of their results or
+/// overwrite any register that's used asynchronously.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+/// \brief One variable for each of the hardware counters
+typedef union {
+ struct {
+ unsigned VM;
+ unsigned EXP;
+ unsigned LGKM;
+ } Named;
+ unsigned Array[3];
+
+} Counters;
+
+typedef Counters RegCounters[512];
+typedef std::pair<unsigned, unsigned> RegInterval;
+
+class SIInsertWaits : public MachineFunctionPass {
+
+private:
+ static char ID;
+ const SIInstrInfo *TII;
+ const SIRegisterInfo &TRI;
+ const MachineRegisterInfo *MRI;
+
+ /// \brief Constant hardware limits
+ static const Counters WaitCounts;
+
+ /// \brief Constant zero value
+ static const Counters ZeroCounts;
+
+ /// \brief Counter values we have already waited on.
+ Counters WaitedOn;
+
+ /// \brief Counter values for last instruction issued.
+ Counters LastIssued;
+
+ /// \brief Registers used by async instructions.
+ RegCounters UsedRegs;
+
+ /// \brief Registers defined by async instructions.
+ RegCounters DefinedRegs;
+
+ /// \brief Different export instruction types seen since last wait.
+ unsigned ExpInstrTypesSeen;
+
+ /// \brief Get increment/decrement amount for this instruction.
+ Counters getHwCounts(MachineInstr &MI);
+
+ /// \brief Is operand relevant for async execution?
+ bool isOpRelevant(MachineOperand &Op);
+
+ /// \brief Get register interval an operand affects.
+ RegInterval getRegInterval(MachineOperand &Op);
+
+ /// \brief Handle instructions async components
+ void pushInstruction(MachineInstr &MI);
+
+ /// \brief Insert the actual wait instruction
+ bool insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Counts);
+
+ /// \brief Do we need def2def checks?
+ bool unorderedDefines(MachineInstr &MI);
+
+ /// \brief Resolve all operand dependencies to counter requirements
+ Counters handleOperands(MachineInstr &MI);
+
+public:
+ SIInsertWaits(TargetMachine &tm) :
+ MachineFunctionPass(ID),
+ TII(static_cast<const SIInstrInfo*>(tm.getInstrInfo())),
+ TRI(TII->getRegisterInfo()) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "SI insert wait instructions";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SIInsertWaits::ID = 0;
+
+const Counters SIInsertWaits::WaitCounts = { { 15, 7, 7 } };
+const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
+
+FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
+ return new SIInsertWaits(tm);
+}
+
+Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
+
+ uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
+ Counters Result;
+
+ Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
+
+ // Only consider stores or EXP for EXP_CNT
+ Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
+ (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
+
+ // LGKM may uses larger values
+ if (TSFlags & SIInstrFlags::LGKM_CNT) {
+
+ MachineOperand &Op = MI.getOperand(0);
+ assert(Op.isReg() && "First LGKM operand must be a register!");
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+ Result.Named.LGKM = Size > 4 ? 2 : 1;
+
+ } else {
+ Result.Named.LGKM = 0;
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
+
+ // Constants are always irrelevant
+ if (!Op.isReg())
+ return false;
+
+ // Defines are always relevant
+ if (Op.isDef())
+ return true;
+
+ // For exports all registers are relevant
+ MachineInstr &MI = *Op.getParent();
+ if (MI.getOpcode() == AMDGPU::EXP)
+ return true;
+
+ // For stores the stored value is also relevant
+ if (!MI.getDesc().mayStore())
+ return false;
+
+ for (MachineInstr::mop_iterator I = MI.operands_begin(),
+ E = MI.operands_end(); I != E; ++I) {
+
+ if (I->isReg() && I->isUse())
+ return Op.isIdenticalTo(*I);
+ }
+
+ return false;
+}
+
+RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
+
+ if (!Op.isReg())
+ return std::make_pair(0, 0);
+
+ unsigned Reg = Op.getReg();
+ unsigned Size = TRI.getMinimalPhysRegClass(Reg)->getSize();
+
+ assert(Size >= 4);
+
+ RegInterval Result;
+ Result.first = TRI.getEncodingValue(Reg);
+ Result.second = Result.first + Size / 4;
+
+ return Result;
+}
+
+void SIInsertWaits::pushInstruction(MachineInstr &MI) {
+
+ // Get the hardware counter increments and sum them up
+ Counters Increment = getHwCounts(MI);
+ unsigned Sum = 0;
+
+ for (unsigned i = 0; i < 3; ++i) {
+ LastIssued.Array[i] += Increment.Array[i];
+ Sum += Increment.Array[i];
+ }
+
+ // If we don't increase anything then that's it
+ if (Sum == 0)
+ return;
+
+ // Remember which export instructions we have seen
+ if (Increment.Named.EXP) {
+ ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
+ }
+
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ if (!isOpRelevant(Op))
+ continue;
+
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ // Remember which registers we define
+ if (Op.isDef())
+ DefinedRegs[j] = LastIssued;
+
+ // and which one we are using
+ if (Op.isUse())
+ UsedRegs[j] = LastIssued;
+ }
+ }
+}
+
+bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ const Counters &Required) {
+
+ // End of program? No need to wait on anything
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ return false;
+
+ // Figure out if the async instructions execute in order
+ bool Ordered[3];
+
+ // VM_CNT is always ordered
+ Ordered[0] = true;
+
+ // EXP_CNT is unordered if we have both EXP & VM-writes
+ Ordered[1] = ExpInstrTypesSeen == 3;
+
+ // LGKM_CNT is handled as always unordered. TODO: Handle LDS and GDS
+ Ordered[2] = false;
+
+ // The values we are going to put into the S_WAITCNT instruction
+ Counters Counts = WaitCounts;
+
+ // Do we really need to wait?
+ bool NeedWait = false;
+
+ for (unsigned i = 0; i < 3; ++i) {
+
+ if (Required.Array[i] <= WaitedOn.Array[i])
+ continue;
+
+ NeedWait = true;
+
+ if (Ordered[i]) {
+ unsigned Value = LastIssued.Array[i] - Required.Array[i];
+
+ // adjust the value to the real hardware posibilities
+ Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
+
+ } else
+ Counts.Array[i] = 0;
+
+ // Remember on what we have waited on
+ WaitedOn.Array[i] = LastIssued.Array[i] - Counts.Array[i];
+ }
+
+ if (!NeedWait)
+ return false;
+
+ // Reset EXP_CNT instruction types
+ if (Counts.Named.EXP == 0)
+ ExpInstrTypesSeen = 0;
+
+ // Build the wait instruction
+ BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm((Counts.Named.VM & 0xF) |
+ ((Counts.Named.EXP & 0x7) << 4) |
+ ((Counts.Named.LGKM & 0x7) << 8));
+
+ return true;
+}
+
+/// \brief helper function for handleOperands
+static void increaseCounters(Counters &Dst, const Counters &Src) {
+
+ for (unsigned i = 0; i < 3; ++i)
+ Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]);
+}
+
+Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
+
+ Counters Result = ZeroCounts;
+
+ // For each register affected by this
+ // instruction increase the result sequence
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+
+ MachineOperand &Op = MI.getOperand(i);
+ RegInterval Interval = getRegInterval(Op);
+ for (unsigned j = Interval.first; j < Interval.second; ++j) {
+
+ if (Op.isDef()) {
+ increaseCounters(Result, UsedRegs[j]);
+ increaseCounters(Result, DefinedRegs[j]);
+ }
+
+ if (Op.isUse())
+ increaseCounters(Result, DefinedRegs[j]);
+ }
+ }
+
+ return Result;
+}
+
+bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
+
+ bool Changes = false;
+
+ MRI = &MF.getRegInfo();
+
+ WaitedOn = ZeroCounts;
+ LastIssued = ZeroCounts;
+
+ memset(&UsedRegs, 0, sizeof(UsedRegs));
+ memset(&DefinedRegs, 0, sizeof(DefinedRegs));
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+
+ Changes |= insertWait(MBB, I, handleOperands(*I));
+ pushInstruction(*I);
+ }
+
+ // Wait for everything at the end of the MBB
+ Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+ }
+
+ return Changes;
+}