1 files changed, 142 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
new file mode 100644
index 000000000000..365c005b2503
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -0,0 +1,142 @@
+//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// R600 target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600TargetTransformInfo.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "R600tti"
+
+R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+    : BaseT(TM, F.getParent()->getDataLayout()),
+      ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
+      TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
+
+unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+  return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
+  return getHardwareNumberOfRegisters(Vec);
+}
+
+TypeSize
+R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+  return TypeSize::getFixed(32);
+}
+
+unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
+
+unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
+  if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+      AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
+    return 128;
+  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+      AddrSpace == AMDGPUAS::REGION_ADDRESS)
+    return 64;
+  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
+    return 32;
+
+  if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
+       AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
+       (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
+        AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
+    return 128;
+  llvm_unreachable("unhandled address space");
+}
+
+bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+                                             Align Alignment,
+                                             unsigned AddrSpace) const {
+  // We allow vectorization of flat stores, even though we may need to decompose
+  // them later if they may access private memory. We don't have enough context
+  // here, and legalization can handle it.
+  return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
+}
+
+bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+                                              Align Alignment,
+                                              unsigned AddrSpace) const {
+  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+                                               Align Alignment,
+                                               unsigned AddrSpace) const {
+  return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+  // Disable unrolling if the loop is not vectorized.
+  // TODO: Enable this again.
+  if (VF == 1)
+    return 1;
+
+  return 8;
+}
+
+InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
+                                            TTI::TargetCostKind CostKind,
+                                            const Instruction *I) {
+  if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+    return Opcode == Instruction::PHI ? 0 : 1;
+
+  // XXX - For some reason this isn't called for switch.
+  switch (Opcode) {
+  case Instruction::Br:
+  case Instruction::Ret:
+    return 10;
+  default:
+    return BaseT::getCFInstrCost(Opcode, CostKind, I);
+  }
+}
+
+InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                                unsigned Index) {
+  switch (Opcode) {
+  case Instruction::ExtractElement:
+  case Instruction::InsertElement: {
+    unsigned EltSize =
+        DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
+    if (EltSize < 32) {
+      return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+    }
+
+    // Extracts are just reads of a subregister, so are free. Inserts are
+    // considered free because we don't want to have any cost for scalarizing
+    // operations, and we don't have to copy into a different register class.
+
+    // Dynamic indexing isn't free and is best avoided.
+    return Index == ~0u ? 2 : 0;
+  }
+  default:
+    return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+  }
+}
+
+void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+                                          TTI::UnrollingPreferences &UP,
+                                          OptimizationRemarkEmitter *ORE) {
+  CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
+}
+
+void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+                                        TTI::PeelingPreferences &PP) {
+  CommonTTI.getPeelingPreferences(L, SE, PP);
+}