1 files changed, 97 insertions, 54 deletions
diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 14e880042691..ce0a9db7c7f4 100644
--- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1,9 +1,8 @@
 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
@@ -16,6 +15,7 @@
 
 #include "AMDGPU.h"
 #include "AMDGPULibFunc.h"
+#include "AMDGPUSubtarget.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/ADT/StringSet.h"
@@ -23,6 +23,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/LLVMContext.h"
@@ -30,6 +31,7 @@
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include <vector>
 #include <cmath>
@@ -66,6 +68,8 @@ private:
 
   typedef llvm::AMDGPULibFunc FuncInfo;
 
+  const TargetMachine *TM;
+
   // -fuse-native.
   bool AllNative = false;
 
@@ -73,7 +77,7 @@ private:
 
   // Return a pointer (pointer expr) to the function if function defintion with
   // "FuncName" exists. It may create a new function prototype in pre-link mode.
-  Constant *getFunction(Module *M, const FuncInfo& fInfo);
+  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
 
   // Replace a normal function with its native version.
   bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
@@ -135,12 +139,15 @@ private:
   // __read_pipe/__write_pipe
   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
 
+  // llvm.amdgcn.wavefrontsize
+  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
+
   // Get insertion point at entry.
   BasicBlock::iterator getEntryIns(CallInst * UI);
   // Insert an Alloc instruction.
   AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
   // Get a scalar native builtin signle argument FP function
-  Constant* getNativeFunction(Module* M, const FuncInfo &FInfo);
+  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
 
 protected:
   CallInst *CI;
@@ -153,6 +160,8 @@ protected:
   }
 
 public:
+  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
+
   bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
 
   void initNativeFuncs();
@@ -167,15 +176,16 @@ namespace {
 
   class AMDGPUSimplifyLibCalls : public FunctionPass {
 
-  AMDGPULibCalls Simplifier;
-
   const TargetOptions Options;
 
+  AMDGPULibCalls Simplifier;
+
   public:
     static char ID; // Pass identification
 
-    AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
-      : FunctionPass(ID), Options(Opt) {
+    AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions(),
+                           const TargetMachine *TM = nullptr)
+      : FunctionPass(ID), Options(Opt), Simplifier(TM) {
       initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }
 
@@ -217,19 +227,19 @@ INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
                 false, false)
 
 template <typename IRB>
-static CallInst *CreateCallEx(IRB &B, Value *Callee, Value *Arg,
+static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
                               const Twine &Name = "") {
   CallInst *R = B.CreateCall(Callee, Arg, Name);
-  if (Function* F = dyn_cast<Function>(Callee))
+  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
     R->setCallingConv(F->getCallingConv());
   return R;
 }
 
 template <typename IRB>
-static CallInst *CreateCallEx2(IRB &B, Value *Callee, Value *Arg1, Value *Arg2,
-                               const Twine &Name = "") {
+static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
+                               Value *Arg2, const Twine &Name = "") {
   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
-  if (Function* F = dyn_cast<Function>(Callee))
+  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
     R->setCallingConv(F->getCallingConv());
   return R;
 }
@@ -472,7 +482,7 @@ static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
 }
 
-Constant *AMDGPULibCalls::getFunction(Module *M, const FuncInfo& fInfo) {
+FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
   // If we are doing PreLinkOpt, the function is external. So it is safe to
   // use getOrInsertFunction() at this stage.
 
@@ -519,11 +529,11 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
 
     nf.setPrefix(AMDGPULibFunc::NATIVE);
     nf.setId(AMDGPULibFunc::EI_SIN);
-    Constant *sinExpr = getFunction(M, nf);
+    FunctionCallee sinExpr = getFunction(M, nf);
 
     nf.setPrefix(AMDGPULibFunc::NATIVE);
     nf.setId(AMDGPULibFunc::EI_COS);
-    Constant *cosExpr = getFunction(M, nf);
+    FunctionCallee cosExpr = getFunction(M, nf);
     if (sinExpr && cosExpr) {
       Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
       Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
@@ -555,7 +565,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
     return sincosUseNative(aCI, FInfo);
 
   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
-  Constant *F = getFunction(aCI->getModule(), FInfo);
+  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
   if (!F)
     return false;
 
@@ -613,7 +623,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
   auto *FTy = FunctionType::get(Callee->getReturnType(),
                                 ArrayRef<Type *>(ArgTys), false);
   AMDGPULibFunc NewLibFunc(Name, FTy);
-  auto *F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
+  FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
   if (!F)
     return false;
 
@@ -640,14 +650,6 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
   // Ignore indirect calls.
   if (Callee == 0) return false;
 
-  FuncInfo FInfo;
-  if (!parseFunctionName(Callee->getName(), &FInfo))
-    return false;
-
-  // Further check the number of arguments to see if they match.
-  if (CI->getNumArgOperands() != FInfo.getNumArgs())
-    return false;
-
   BasicBlock *BB = CI->getParent();
   LLVMContext &Context = CI->getParent()->getContext();
   IRBuilder<> B(Context);
@@ -659,6 +661,21 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
   if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
     B.setFastMathFlags(FPOp->getFastMathFlags());
 
+  switch (Callee->getIntrinsicID()) {
+  default:
+    break;
+  case Intrinsic::amdgcn_wavefrontsize:
+    return !EnablePreLink && fold_wavefrontsize(CI, B);
+  }
+
+  FuncInfo FInfo;
+  if (!parseFunctionName(Callee->getName(), &FInfo))
+    return false;
+
+  // Further check the number of arguments to see if they match.
+  if (CI->getNumArgOperands() != FInfo.getNumArgs())
+    return false;
+
   if (TDOFold(CI, FInfo))
     return true;
 
@@ -795,7 +812,7 @@ bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
 
   AMDGPULibFunc nf = FInfo;
   nf.setPrefix(AMDGPULibFunc::NATIVE);
-  if (Constant *FPExpr = getFunction(M, nf)) {
+  if (FunctionCallee FPExpr = getFunction(M, nf)) {
     LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
 
     CI->setCalledFunction(FPExpr);
@@ -848,7 +865,7 @@ bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
 
 namespace llvm {
 static double log2(double V) {
-#if _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE || _POSIX_C_SOURCE >= 200112L
+#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
   return ::log2(V);
 #else
   return log(V) / 0.693147180559945309417;
@@ -934,9 +951,10 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
     // pow[r](x, [-]0.5) = sqrt(x)
     bool issqrt = CF->isExactlyValue(0.5);
-    if (Constant *FPExpr = getFunction(M,
-        AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
-                             : AMDGPULibFunc::EI_RSQRT, FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
+                                                : AMDGPULibFunc::EI_RSQRT,
+                                         FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                         << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
@@ -1003,8 +1021,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
 
   // powr ---> exp2(y * log2(x))
   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
-  Constant *ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2,
-                                                   FInfo));
+  FunctionCallee ExpExpr =
+      getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
   if (!ExpExpr)
     return false;
 
@@ -1090,8 +1108,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
 
   Value *nval;
   if (needabs) {
-    Constant *AbsExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS,
-                                                     FInfo));
+    FunctionCallee AbsExpr =
+        getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
     if (!AbsExpr)
       return false;
     nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
@@ -1099,8 +1117,8 @@ bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
     nval = cnval ? cnval : opr0;
   }
   if (needlog) {
-    Constant *LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2,
-                                                     FInfo));
+    FunctionCallee LogExpr =
+        getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
     if (!LogExpr)
       return false;
     nval = CreateCallEx(B,LogExpr, nval, "__log2");
@@ -1159,8 +1177,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT,
-                                                        FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
       replaceCall(nval);
@@ -1168,8 +1186,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
     }
   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT,
-                                                        FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
       replaceCall(nval);
@@ -1186,8 +1204,8 @@ bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
     std::vector<const Type*> ParamsTys;
     ParamsTys.push_back(opr0->getType());
     Module *M = CI->getModule();
-    if (Constant *FPExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT,
-                                                        FInfo))) {
+    if (FunctionCallee FPExpr =
+            getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
                         << ")\n");
       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
@@ -1243,7 +1261,8 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
 }
 
 // Get a scalar native builtin signle argument FP function
-Constant* AMDGPULibCalls::getNativeFunction(Module* M, const FuncInfo& FInfo) {
+FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
+                                                 const FuncInfo &FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
     return nullptr;
   FuncInfo nf = FInfo;
@@ -1256,8 +1275,8 @@ bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
                                const FuncInfo &FInfo) {
   if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
       (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
-    if (Constant *FPExpr = getNativeFunction(
-        CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
+    if (FunctionCallee FPExpr = getNativeFunction(
+            CI->getModule(), AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
       Value *opr0 = CI->getArgOperand(0);
       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
                         << "sqrt(" << *opr0 << ")\n");
@@ -1334,7 +1353,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   // function.
   AMDGPULibFunc nf(AMDGPULibFunc::EI_SINCOS, fInfo);
   nf.getLeads()[0].PtrKind = AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
-  Function *Fsincos = dyn_cast_or_null<Function>(getFunction(M, nf));
+  FunctionCallee Fsincos = getFunction(M, nf);
   if (!Fsincos) return false;
 
   BasicBlock::iterator ItOld = B.GetInsertPoint();
@@ -1342,7 +1361,7 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   B.SetInsertPoint(UI);
 
   Value *P = Alloc;
-  Type *PTy = Fsincos->getFunctionType()->getParamType(1);
+  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
   // The allocaInst allocates the memory in private address space. This need
   // to be bitcasted to point to the address space of cos pointer type.
   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
@@ -1356,12 +1375,12 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   if (!isSin) { // CI->cos, UI->sin
     B.SetInsertPoint(&*ItOld);
     UI->replaceAllUsesWith(&*Call);
-    Instruction *Reload = B.CreateLoad(Alloc);
+    Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
     CI->replaceAllUsesWith(Reload);
     UI->eraseFromParent();
     CI->eraseFromParent();
   } else { // CI->sin, UI->cos
-    Instruction *Reload = B.CreateLoad(Alloc);
+    Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
     UI->replaceAllUsesWith(Reload);
     CI->replaceAllUsesWith(Call);
     UI->eraseFromParent();
@@ -1370,6 +1389,29 @@ bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
   return true;
 }
 
+bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
+  if (!TM)
+    return false;
+
+  StringRef CPU = TM->getTargetCPU();
+  StringRef Features = TM->getTargetFeatureString();
+  if ((CPU.empty() || CPU.equals_lower("generic")) &&
+      (Features.empty() ||
+       Features.find_lower("wavefrontsize") == StringRef::npos))
+    return false;
+
+  Function *F = CI->getParent()->getParent();
+  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
+  unsigned N = ST.getWavefrontSize();
+
+  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
+               << N << "\n");
+
+  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
+  CI->eraseFromParent();
+  return true;
+}
+
 // Get insertion point at entry.
 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
   Function * Func = UI->getParent()->getParent();
@@ -1679,8 +1721,9 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
 }
 
 // Public interface to the Simplify LibCalls pass.
-FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
-  return new AMDGPUSimplifyLibCalls(Opt);
+FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt,
+                                                     const TargetMachine *TM) {
+  return new AMDGPUSimplifyLibCalls(Opt, TM);
 }
 
 FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {