diff options
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPULibCalls.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPULibCalls.cpp | 151 |
1 files changed, 97 insertions, 54 deletions
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 14e880042691..ce0a9db7c7f4 100644 --- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1,9 +1,8 @@ //===- AMDGPULibCalls.cpp -------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -16,6 +15,7 @@ #include "AMDGPU.h" #include "AMDGPULibFunc.h" +#include "AMDGPUSubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Loads.h" #include "llvm/ADT/StringSet.h" @@ -23,6 +23,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" @@ -30,6 +31,7 @@ #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <vector> #include <cmath> @@ -66,6 +68,8 @@ private: typedef llvm::AMDGPULibFunc FuncInfo; + const TargetMachine *TM; + // -fuse-native. bool AllNative = false; @@ -73,7 +77,7 @@ private: // Return a pointer (pointer expr) to the function if function defintion with // "FuncName" exists. It may create a new function prototype in pre-link mode. - Constant *getFunction(Module *M, const FuncInfo& fInfo); + FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); // Replace a normal function with its native version. bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo); @@ -135,12 +139,15 @@ private: // __read_pipe/__write_pipe bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo); + // llvm.amdgcn.wavefrontsize + bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B); + // Get insertion point at entry. BasicBlock::iterator getEntryIns(CallInst * UI); // Insert an Alloc instruction. AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix); // Get a scalar native builtin signle argument FP function - Constant* getNativeFunction(Module* M, const FuncInfo &FInfo); + FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); protected: CallInst *CI; @@ -153,6 +160,8 @@ protected: } public: + AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {} + bool fold(CallInst *CI, AliasAnalysis *AA = nullptr); void initNativeFuncs(); @@ -167,15 +176,16 @@ namespace { class AMDGPUSimplifyLibCalls : public FunctionPass { - AMDGPULibCalls Simplifier; - const TargetOptions Options; + AMDGPULibCalls Simplifier; + public: static char ID; // Pass identification - AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions()) - : FunctionPass(ID), Options(Opt) { + AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(), + const TargetMachine *TM = nullptr) + : FunctionPass(ID), Options(Opt), Simplifier(TM) { initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } @@ -217,19 +227,19 @@ INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative", false, false) template <typename IRB> -static CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg, +static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name = "") { CallInst *R = B.CreateCall(Callee, Arg, Name); - if (Function* F = dyn_cast<Function>(Callee)) + if (Function *F = dyn_cast<Function>(Callee.getCallee())) R->setCallingConv(F->getCallingConv()); return R; } template <typename IRB> -static CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2, - const Twine &Name = "") { +static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, + Value *Arg2, const Twine &Name = "") { CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); - if (Function* F = dyn_cast<Function>(Callee)) + if (Function *F = dyn_cast<Function>(Callee.getCallee())) R->setCallingConv(F->getCallingConv()); return R; } @@ -472,7 +482,7 @@ static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; } -Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) { +FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { // If we are doing PreLinkOpt, the function is external. So it is safe to // use getOrInsertFunction() at this stage. @@ -519,11 +529,11 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { nf.setPrefix(AMDGPULibFunc::NATIVE); nf.setId(AMDGPULibFunc::EI_SIN); - Constant *sinExpr = getFunction(M, nf); + FunctionCallee sinExpr = getFunction(M, nf); nf.setPrefix(AMDGPULibFunc::NATIVE); nf.setId(AMDGPULibFunc::EI_COS); - Constant *cosExpr = getFunction(M, nf); + FunctionCallee cosExpr = getFunction(M, nf); if (sinExpr && cosExpr) { Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); @@ -555,7 +565,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) { return sincosUseNative(aCI, FInfo); FInfo.setPrefix(AMDGPULibFunc::NATIVE); - Constant *F = getFunction(aCI->getModule(), FInfo); + FunctionCallee F = getFunction(aCI->getModule(), FInfo); if (!F) return false; @@ -613,7 +623,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, auto *FTy = FunctionType::get(Callee->getReturnType(), ArrayRef<Type *>(ArgTys), false); AMDGPULibFunc NewLibFunc(Name, FTy); - auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); + FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); if (!F) return false; @@ -640,14 +650,6 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { // Ignore indirect calls. if (Callee == 0) return false; - FuncInfo FInfo; - if (!parseFunctionName(Callee->getName(), &FInfo)) - return false; - - // Further check the number of arguments to see if they match. - if (CI->getNumArgOperands() != FInfo.getNumArgs()) - return false; - BasicBlock *BB = CI->getParent(); LLVMContext &Context = CI->getParent()->getContext(); IRBuilder<> B(Context); @@ -659,6 +661,21 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI)) B.setFastMathFlags(FPOp->getFastMathFlags()); + switch (Callee->getIntrinsicID()) { + default: + break; + case Intrinsic::amdgcn_wavefrontsize: + return !EnablePreLink && fold_wavefrontsize(CI, B); + } + + FuncInfo FInfo; + if (!parseFunctionName(Callee->getName(), &FInfo)) + return false; + + // Further check the number of arguments to see if they match. + if (CI->getNumArgOperands() != FInfo.getNumArgs()) + return false; + if (TDOFold(CI, FInfo)) return true; @@ -795,7 +812,7 @@ bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) { AMDGPULibFunc nf = FInfo; nf.setPrefix(AMDGPULibFunc::NATIVE); - if (Constant *FPExpr = getFunction(M, nf)) { + if (FunctionCallee FPExpr = getFunction(M, nf)) { LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> "); CI->setCalledFunction(FPExpr); @@ -848,7 +865,7 @@ bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B, namespace llvm { static double log2(double V) { -#if _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L +#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L return ::log2(V); #else return log(V) / 0.693147180559945309417; @@ -934,9 +951,10 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { // pow[r](x, [-]0.5) = sqrt(x) bool issqrt = CF->isExactlyValue(0.5); - if (Constant *FPExpr = getFunction(M, - AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT - : AMDGPULibFunc::EI_RSQRT, FInfo))) { + if (FunctionCallee FPExpr = + getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT + : AMDGPULibFunc::EI_RSQRT, + FInfo))) { LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << FInfo.getName().c_str() << "(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" @@ -1003,8 +1021,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, // powr ---> exp2(y * log2(x)) // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) - Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, - FInfo)); + FunctionCallee ExpExpr = + getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); if (!ExpExpr) return false; @@ -1090,8 +1108,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, Value *nval; if (needabs) { - Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, - FInfo)); + FunctionCallee AbsExpr = + getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo)); if (!AbsExpr) return false; nval = CreateCallEx(B, AbsExpr, opr0, "__fabs"); @@ -1099,8 +1117,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B, nval = cnval ? cnval : opr0; } if (needlog) { - Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, - FInfo)); + FunctionCallee LogExpr = + getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); if (!LogExpr) return false; nval = CreateCallEx(B,LogExpr, nval, "__log2"); @@ -1159,8 +1177,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, std::vector<const Type*> ParamsTys; ParamsTys.push_back(opr0->getType()); Module *M = CI->getModule(); - if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, - FInfo))) { + if (FunctionCallee FPExpr = + getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); replaceCall(nval); @@ -1168,8 +1186,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, } } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) Module *M = CI->getModule(); - if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, - FInfo))) { + if (FunctionCallee FPExpr = + getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); replaceCall(nval); @@ -1186,8 +1204,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B, std::vector<const Type*> ParamsTys; ParamsTys.push_back(opr0->getType()); Module *M = CI->getModule(); - if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, - FInfo))) { + if (FunctionCallee FPExpr = + getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0 << ")\n"); Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); @@ -1243,7 +1261,8 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B, } // Get a scalar native builtin signle argument FP function -Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) { +FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, + const FuncInfo &FInfo) { if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) return nullptr; FuncInfo nf = FInfo; @@ -1256,8 +1275,8 @@ bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo) { if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { - if (Constant *FPExpr = getNativeFunction( - CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { + if (FunctionCallee FPExpr = getNativeFunction( + CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { Value *opr0 = CI->getArgOperand(0); LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << "sqrt(" << *opr0 << ")\n"); @@ -1334,7 +1353,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, // function. AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo); nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); - Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf)); + FunctionCallee Fsincos = getFunction(M, nf); if (!Fsincos) return false; BasicBlock::iterator ItOld = B.GetInsertPoint(); @@ -1342,7 +1361,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, B.SetInsertPoint(UI); Value *P = Alloc; - Type *PTy = Fsincos->getFunctionType()->getParamType(1); + Type *PTy = Fsincos.getFunctionType()->getParamType(1); // The allocaInst allocates the memory in private address space. This need // to be bitcasted to point to the address space of cos pointer type. // In OpenCL 2.0 this is generic, while in 1.2 that is private. @@ -1356,12 +1375,12 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, if (!isSin) { // CI->cos, UI->sin B.SetInsertPoint(&*ItOld); UI->replaceAllUsesWith(&*Call); - Instruction *Reload = B.CreateLoad(Alloc); + Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); CI->replaceAllUsesWith(Reload); UI->eraseFromParent(); CI->eraseFromParent(); } else { // CI->sin, UI->cos - Instruction *Reload = B.CreateLoad(Alloc); + Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc); UI->replaceAllUsesWith(Reload); CI->replaceAllUsesWith(Call); UI->eraseFromParent(); @@ -1370,6 +1389,29 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B, return true; } +bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) { + if (!TM) + return false; + + StringRef CPU = TM->getTargetCPU(); + StringRef Features = TM->getTargetFeatureString(); + if ((CPU.empty() || CPU.equals_lower("generic")) && + (Features.empty() || + Features.find_lower("wavefrontsize") == StringRef::npos)) + return false; + + Function *F = CI->getParent()->getParent(); + const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F); + unsigned N = ST.getWavefrontSize(); + + LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with " + << N << "\n"); + + CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N)); + CI->eraseFromParent(); + return true; +} + // Get insertion point at entry. BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) { Function * Func = UI->getParent()->getParent(); @@ -1679,8 +1721,9 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) { } // Public interface to the Simplify LibCalls pass. -FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) { - return new AMDGPUSimplifyLibCalls(Opt); +FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt, + const TargetMachine *TM) { + return new AMDGPUSimplifyLibCalls(Opt, TM); } FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { |