1 files changed, 180 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
new file mode 100644
index 000000000000..ee011286b8ff
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp
@@ -0,0 +1,180 @@
+//===-- SIAddIMGInit.cpp - Add any required IMG inits ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Any MIMG instructions that use tfe or lwe require an initialization of the
+/// result register that will be written in the case of a memory access failure
+/// The required code is also added to tie this init code to the result of the
+/// img instruction
+///
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "si-img-init"
+
+using namespace llvm;
+
+namespace {
+
+class SIAddIMGInit : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIAddIMGInit() : MachineFunctionPass(ID) {
+    initializeSIAddIMGInitPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS(SIAddIMGInit, DEBUG_TYPE, "SI Add IMG Init", false, false)
+
+char SIAddIMGInit::ID = 0;
+
+char &llvm::SIAddIMGInitID = SIAddIMGInit::ID;
+
+FunctionPass *llvm::createSIAddIMGInitPass() { return new SIAddIMGInit(); }
+
+bool SIAddIMGInit::runOnMachineFunction(MachineFunction &MF) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo *RI = ST.getRegisterInfo();
+  bool Changed = false;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+       ++BI) {
+    MachineBasicBlock &MBB = *BI;
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+      MachineInstr &MI = *I;
+
+      auto Opcode = MI.getOpcode();
+      if (TII->isMIMG(Opcode) && !MI.mayStore()) {
+        MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
+        MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
+        MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);
+
+        // Check for instructions that don't have tfe or lwe fields
+        // There shouldn't be any at this point.
+        assert( (TFE && LWE) && "Expected tfe and lwe operands in instruction");
+
+        unsigned TFEVal = TFE->getImm();
+        unsigned LWEVal = LWE->getImm();
+        unsigned D16Val = D16 ? D16->getImm() : 0;
+
+        if (TFEVal || LWEVal) {
+          // At least one of TFE or LWE are non-zero
+          // We have to insert a suitable initialization of the result value and
+          // tie this to the dest of the image instruction.
+
+          const DebugLoc &DL = MI.getDebugLoc();
+
+          int DstIdx =
+              AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);
+
+          // Calculate which dword we have to initialize to 0.
+          MachineOperand *MO_Dmask =
+              TII->getNamedOperand(MI, AMDGPU::OpName::dmask);
+
+          // check that dmask operand is found.
+          assert(MO_Dmask && "Expected dmask operand in instruction");
+
+          unsigned dmask = MO_Dmask->getImm();
+          // Determine the number of active lanes taking into account the
+          // Gather4 special case
+          unsigned ActiveLanes =
+              TII->isGather4(Opcode) ? 4 : countPopulation(dmask);
+
+          // Subreg indices are counted from 1
+          // When D16 then we want next whole VGPR after write data.
+          static_assert(AMDGPU::sub0 == 1 && AMDGPU::sub4 == 5, "Subreg indices different from expected");
+
+          bool Packed = !ST.hasUnpackedD16VMem();
+
+          unsigned InitIdx =
+              D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;
+
+          // Abandon attempt if the dst size isn't large enough
+          // - this is in fact an error but this is picked up elsewhere and
+          // reported correctly.
+          uint32_t DstSize =
+              RI->getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
+          if (DstSize < InitIdx)
+            continue;
+
+          // Create a register for the intialization value.
+          Register PrevDst =
+              MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+          unsigned NewDst = 0; // Final initialized value will be in here
+
+          // If PRTStrictNull feature is enabled (the default) then initialize
+          // all the result registers to 0, otherwise just the error indication
+          // register (VGPRn+1)
+          unsigned SizeLeft = ST.usePRTStrictNull() ? InitIdx : 1;
+          unsigned CurrIdx = ST.usePRTStrictNull() ? 1 : InitIdx;
+
+          if (DstSize == 1) {
+            // In this case we can just initialize the result directly
+            BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), PrevDst)
+                .addImm(0);
+            NewDst = PrevDst;
+          } else {
+            BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
+            for (; SizeLeft; SizeLeft--, CurrIdx++) {
+              NewDst =
+                  MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
+              // Initialize dword
+              Register SubReg =
+                  MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+              BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
+                  .addImm(0);
+              // Insert into the super-reg
+              BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
+                  .addReg(PrevDst)
+                  .addReg(SubReg)
+                  .addImm(CurrIdx);
+
+              PrevDst = NewDst;
+            }
+          }
+
+          // Add as an implicit operand
+          MachineInstrBuilder(MF, MI).addReg(NewDst, RegState::Implicit);
+
+          // Tie the just added implicit operand to the dst
+          MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
+
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  return Changed;
+}