diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp new file mode 100644 index 000000000000..365c005b2503 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp @@ -0,0 +1,142 @@ +//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// \file +// This file implements a TargetTransformInfo analysis pass specific to the +// R600 target machine. It uses the target's detailed information to provide +// more precise answers to certain TTI queries, while letting the target +// independent and default TTI implementations handle the rest. +// +//===----------------------------------------------------------------------===// + +#include "R600TargetTransformInfo.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#include "R600Subtarget.h" + +using namespace llvm; + +#define DEBUG_TYPE "R600tti" + +R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), + ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))), + TLI(ST->getTargetLowering()), CommonTTI(TM, F) {} + +unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { + return 4 * 128; // XXX - 4 channels. Should these count as vector instead? +} + +unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const { + return getHardwareNumberOfRegisters(Vec); +} + +TypeSize +R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { + return TypeSize::getFixed(32); +} + +unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; } + +unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { + if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS || + AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) + return 128; + if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::REGION_ADDRESS) + return 64; + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) + return 32; + + if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || + AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || + (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && + AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) + return 128; + llvm_unreachable("unhandled address space"); +} + +bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, + Align Alignment, + unsigned AddrSpace) const { + // We allow vectorization of flat stores, even though we may need to decompose + // them later if they may access private memory. We don't have enough context + // here, and legalization can handle it. + return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS); +} + +bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, + Align Alignment, + unsigned AddrSpace) const { + return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); +} + +bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, + Align Alignment, + unsigned AddrSpace) const { + return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace); +} + +unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) { + // Disable unrolling if the loop is not vectorized. + // TODO: Enable this again. + if (VF == 1) + return 1; + + return 8; +} + +InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind, + const Instruction *I) { + if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) + return Opcode == Instruction::PHI ? 0 : 1; + + // XXX - For some reason this isn't called for switch. + switch (Opcode) { + case Instruction::Br: + case Instruction::Ret: + return 10; + default: + return BaseT::getCFInstrCost(Opcode, CostKind, I); + } +} + +InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, + unsigned Index) { + switch (Opcode) { + case Instruction::ExtractElement: + case Instruction::InsertElement: { + unsigned EltSize = + DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType()); + if (EltSize < 32) { + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + } + + // Extracts are just reads of a subregister, so are free. Inserts are + // considered free because we don't want to have any cost for scalarizing + // operations, and we don't have to copy into a different register class. + + // Dynamic indexing isn't free and is best avoided. + return Index == ~0u ? 2 : 0; + } + default: + return BaseT::getVectorInstrCost(Opcode, ValTy, Index); + } +} + +void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + CommonTTI.getUnrollingPreferences(L, SE, UP, ORE); +} + +void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + CommonTTI.getPeelingPreferences(L, SE, PP); +} |
