aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp142
1 files changed, 142 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
new file mode 100644
index 000000000000..365c005b2503
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -0,0 +1,142 @@
+//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// R600 target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600TargetTransformInfo.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "R600tti"
+
+R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
+
+unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
+ return getHardwareNumberOfRegisters(Vec);
+}
+
+TypeSize
+R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+ return TypeSize::getFixed(32);
+}
+
+unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
+
+unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
+ if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
+ return 128;
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
+ return 64;
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
+ return 32;
+
+ if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
+ AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
+ (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
+ AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
+ return 128;
+ llvm_unreachable("unhandled address space");
+}
+
+bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ // We allow vectorization of flat stores, even though we may need to decompose
+ // them later if they may access private memory. We don't have enough context
+ // here, and legalization can handle it.
+ return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
+}
+
+bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ // Disable unrolling if the loop is not vectorized.
+ // TODO: Enable this again.
+ if (VF == 1)
+ return 1;
+
+ return 8;
+}
+
+InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+ return Opcode == Instruction::PHI ? 0 : 1;
+
+ // XXX - For some reason this isn't called for switch.
+ switch (Opcode) {
+ case Instruction::Br:
+ case Instruction::Ret:
+ return 10;
+ default:
+ return BaseT::getCFInstrCost(Opcode, CostKind, I);
+ }
+}
+
+InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
+ switch (Opcode) {
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement: {
+ unsigned EltSize =
+ DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
+ if (EltSize < 32) {
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+
+ // Extracts are just reads of a subregister, so are free. Inserts are
+ // considered free because we don't want to have any cost for scalarizing
+ // operations, and we don't have to copy into a different register class.
+
+ // Dynamic indexing isn't free and is best avoided.
+ return Index == ~0u ? 2 : 0;
+ }
+ default:
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+}
+
+void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
+}
+
+void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ CommonTTI.getPeelingPreferences(L, SE, PP);
+}