diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp | 90 |
1 files changed, 89 insertions, 1 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index aaed280a1270..616196ad5ba3 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -57,7 +57,7 @@ using namespace llvm; static cl::opt<unsigned> UnrollThresholdPrivate( "amdgpu-unroll-threshold-private", cl::desc("Unroll threshold for AMDGPU if private memory used in a loop"), - cl::init(2500), cl::Hidden); + cl::init(2000), cl::Hidden); static cl::opt<unsigned> UnrollThresholdLocal( "amdgpu-unroll-threshold-local", @@ -590,6 +590,61 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const { return false; } +bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, + Intrinsic::ID IID) const { + switch (IID) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: + OpIndexes.push_back(0); + return true; + default: + return false; + } +} + +bool GCNTTIImpl::rewriteIntrinsicWithAddressSpace( + IntrinsicInst *II, Value *OldV, Value *NewV) const { + auto IntrID = II->getIntrinsicID(); + switch (IntrID) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: { + const ConstantInt *IsVolatile = cast<ConstantInt>(II->getArgOperand(4)); + if (!IsVolatile->isZero()) + return false; + Module *M = II->getParent()->getParent()->getParent(); + Type *DestTy = II->getType(); + Type *SrcTy = NewV->getType(); + Function *NewDecl = + Intrinsic::getDeclaration(M, II->getIntrinsicID(), {DestTy, SrcTy}); + II->setArgOperand(0, NewV); + II->setCalledFunction(NewDecl); + return true; + } + case Intrinsic::amdgcn_is_shared: + case Intrinsic::amdgcn_is_private: { + unsigned TrueAS = IntrID == Intrinsic::amdgcn_is_shared ? + AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS; + unsigned NewAS = NewV->getType()->getPointerAddressSpace(); + LLVMContext &Ctx = NewV->getType()->getContext(); + ConstantInt *NewVal = (TrueAS == NewAS) ? + ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); + II->replaceAllUsesWith(NewVal); + II->eraseFromParent(); + return true; + } + default: + return false; + } +} + unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { if (ST->hasVOP3PInsts()) { @@ -638,6 +693,39 @@ void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, CommonTTI.getUnrollingPreferences(L, SE, UP); } +unsigned GCNTTIImpl::getUserCost(const User *U, + ArrayRef<const Value *> Operands) { + // Estimate extractelement elimination + if (const ExtractElementInst *EE = dyn_cast<ExtractElementInst>(U)) { + ConstantInt *CI = dyn_cast<ConstantInt>(EE->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(EE->getOpcode(), EE->getOperand(0)->getType(), + Idx); + } + + // Estimate insertelement elimination + if (const InsertElementInst *IE = dyn_cast<InsertElementInst>(U)) { + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(IE->getOpcode(), IE->getType(), Idx); + } + + // Estimate different intrinsics, e.g. llvm.fabs + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { + SmallVector<Value *, 4> Args(II->arg_operands()); + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(II)) + FMF = FPMO->getFastMathFlags(); + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, + FMF); + } + return BaseT::getUserCost(U, Operands); +} + unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const { return 4 * 128; // XXX - 4 channels. Should these count as vector instead? } |