diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-07-26 19:36:28 +0000 |
commit | cfca06d7963fa0909f90483b42a6d7d194d01e08 (patch) | |
tree | 209fb2a2d68f8f277793fc8df46c753d31bc853b /llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp | |
parent | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (diff) |
Notes
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp | 203 |
1 files changed, 203 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp new file mode 100644 index 0000000000000..35c49ae8c0dd1 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp @@ -0,0 +1,203 @@ +//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Insert s_clause instructions to form hard clauses. +/// +/// Clausing load instructions can give cache coherency benefits. Before gfx10, +/// the hardware automatically detected "soft clauses", which were sequences of +/// memory instructions of the same type. In gfx10 this detection was removed, +/// and the s_clause instruction was introduced to explicitly mark "hard +/// clauses". +/// +/// It's the scheduler's job to form the clauses by putting similar memory +/// instructions next to each other. Our job is just to insert an s_clause +/// instruction to mark the start of each clause. +/// +/// Note that hard clauses are very similar to, but logically distinct from, the +/// groups of instructions that have to be restartable when XNACK is enabled. +/// The rules are slightly different in each case. For example an s_nop +/// instruction breaks a restartable group, but can appear in the middle of a +/// hard clause. (Before gfx10 there wasn't a distinction, and both were called +/// "soft clauses" or just "clauses".) +/// +/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable +/// groups, not hard clauses. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" +#include "llvm/ADT/SmallVector.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-insert-hard-clauses" + +namespace { + +enum HardClauseType { + // Texture, buffer, global or scratch memory instructions. + HARDCLAUSE_VMEM, + // Flat (not global or scratch) memory instructions. + HARDCLAUSE_FLAT, + // Instructions that access LDS. + HARDCLAUSE_LDS, + // Scalar memory instructions. + HARDCLAUSE_SMEM, + // VALU instructions. + HARDCLAUSE_VALU, + LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, + + // Internal instructions, which are allowed in the middle of a hard clause, + // except for s_waitcnt. + HARDCLAUSE_INTERNAL, + // Instructions that are not allowed in a hard clause: SALU, export, branch, + // message, GDS, s_waitcnt and anything else not mentioned above. + HARDCLAUSE_ILLEGAL, +}; + +HardClauseType getHardClauseType(const MachineInstr &MI) { + // On current architectures we only get a benefit from clausing loads. + if (MI.mayLoad()) { + if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) + return HARDCLAUSE_VMEM; + if (SIInstrInfo::isFLAT(MI)) + return HARDCLAUSE_FLAT; + // TODO: LDS + if (SIInstrInfo::isSMRD(MI)) + return HARDCLAUSE_SMEM; + } + + // Don't form VALU clauses. It's not clear what benefit they give, if any. + + // In practice s_nop is the only internal instruction we're likely to see. + // It's safe to treat the rest as illegal. + if (MI.getOpcode() == AMDGPU::S_NOP) + return HARDCLAUSE_INTERNAL; + return HARDCLAUSE_ILLEGAL; +} + +class SIInsertHardClauses : public MachineFunctionPass { +public: + static char ID; + + SIInsertHardClauses() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + // Track information about a clause as we discover it. + struct ClauseInfo { + // The type of all (non-internal) instructions in the clause. + HardClauseType Type = HARDCLAUSE_ILLEGAL; + // The first (necessarily non-internal) instruction in the clause. + MachineInstr *First = nullptr; + // The last non-internal instruction in the clause. + MachineInstr *Last = nullptr; + // The length of the clause including any internal instructions in the + // middle or after the end of the clause. + unsigned Length = 0; + // The base operands of *Last. + SmallVector<const MachineOperand *, 4> BaseOps; + }; + + bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { + // Get the size of the clause excluding any internal instructions at the + // end. + unsigned Size = + std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1; + if (Size < 2) + return false; + assert(Size <= 64 && "Hard clause is too long!"); + + auto &MBB = *CI.First->getParent(); + auto ClauseMI = + BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) + .addImm(Size - 1); + finalizeBundle(MBB, ClauseMI->getIterator(), + std::next(CI.Last->getIterator())); + return true; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + if (!ST.hasHardClauses()) + return false; + + const SIInstrInfo *SII = ST.getInstrInfo(); + const TargetRegisterInfo *TRI = ST.getRegisterInfo(); + + bool Changed = false; + for (auto &MBB : MF) { + ClauseInfo CI; + for (auto &MI : MBB) { + HardClauseType Type = getHardClauseType(MI); + + int64_t Dummy1; + bool Dummy2; + unsigned Dummy3; + SmallVector<const MachineOperand *, 4> BaseOps; + if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { + if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, + Dummy3, TRI)) { + // We failed to get the base operands, so we'll never clause this + // instruction with any other, so pretend it's illegal. + Type = HARDCLAUSE_ILLEGAL; + } + } + + if (CI.Length == 64 || + (CI.Length && Type != HARDCLAUSE_INTERNAL && + (Type != CI.Type || + // Note that we lie to shouldClusterMemOps about the size of the + // cluster. When shouldClusterMemOps is called from the machine + // scheduler it limits the size of the cluster to avoid increasing + // register pressure too much, but this pass runs after register + // allocation so there is no need for that kind of limit. + !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { + // Finish the current clause. + Changed |= emitClause(CI, SII); + CI = ClauseInfo(); + } + + if (CI.Length) { + // Extend the current clause. + ++CI.Length; + if (Type != HARDCLAUSE_INTERNAL) { + CI.Last = &MI; + CI.BaseOps = std::move(BaseOps); + } + } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { + // Start a new clause. + CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)}; + } + } + + // Finish the last clause in the basic block if any. + if (CI.Length) + Changed |= emitClause(CI, SII); + } + + return Changed; + } +}; + +} // namespace + +char SIInsertHardClauses::ID = 0; + +char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; + +INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", + false, false) |