diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 69 |
1 files changed, 49 insertions, 20 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index e72b3f4fde63..625074569cfa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -21,7 +21,6 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -71,7 +70,8 @@ public: static bool visitConstantExpr(const ConstantExpr *CE); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet<const Constant *, 8> &ConstantExprVisited); + SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc, + bool HasApertureRegs); }; } // end anonymous namespace @@ -93,6 +93,14 @@ static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { return castRequiresQueuePtr(ASC->getSrcAddressSpace()); } +static bool isDSAddress(const Constant *C) { + const GlobalValue *GV = dyn_cast<GlobalValue>(C); + if (!GV) + return false; + unsigned AS = GV->getAddressSpace(); + return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; +} + bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); @@ -104,7 +112,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet<const Constant *, 8> &ConstantExprVisited) { + SmallPtrSet<const Constant *, 8> &ConstantExprVisited, + bool IsFunc, bool HasApertureRegs) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -115,9 +124,13 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( while (!Stack.empty()) { const Constant *C = Stack.pop_back_val(); + // We need to trap on DS globals in non-entry functions. + if (IsFunc && isDSAddress(C)) + return true; + // Check this constant expression. if (const auto *CE = dyn_cast<ConstantExpr>(C)) { - if (visitConstantExpr(CE)) + if (!HasApertureRegs && visitConstantExpr(CE)) return true; } @@ -202,7 +215,7 @@ static void copyFeaturesToFunction(Function &Parent, const Function &Callee, "amdgpu-work-item-id-z", "amdgpu-work-group-id-x", "amdgpu-work-group-id-y", "amdgpu-work-group-id-z", "amdgpu-dispatch-ptr", "amdgpu-dispatch-id", - "amdgpu-kernarg-segment-ptr", "amdgpu-implicitarg-ptr"}; + "amdgpu-implicitarg-ptr"}; if (handleAttr(Parent, Callee, "amdgpu-queue-ptr")) NeedQueuePtr = true; @@ -263,10 +276,10 @@ bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute( bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F); - bool HasFlat = ST.hasFlatAddressSpace(); bool HasApertureRegs = ST.hasApertureRegs(); SmallPtrSet<const Constant *, 8> ConstantExprVisited; + bool HaveStackObjects = false; bool Changed = false; bool NeedQueuePtr = false; bool HaveCall = false; @@ -274,13 +287,18 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { for (BasicBlock &BB : F) { for (Instruction &I : BB) { - CallSite CS(&I); - if (CS) { - Function *Callee = CS.getCalledFunction(); + if (isa<AllocaInst>(I)) { + HaveStackObjects = true; + continue; + } + + if (auto *CB = dyn_cast<CallBase>(&I)) { + const Function *Callee = + dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts()); // TODO: Do something with indirect calls. if (!Callee) { - if (!CS.isInlineAsm()) + if (!CB->isInlineAsm()) HaveCall = true; continue; } @@ -292,20 +310,25 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { Changed = true; } else { bool NonKernelOnly = false; - StringRef AttrName = intrinsicToAttrName(IID, - NonKernelOnly, NeedQueuePtr); - if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { - F.addFnAttr(AttrName); - Changed = true; + + if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) { + F.addFnAttr("amdgpu-kernarg-segment-ptr"); + } else { + StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly, + NeedQueuePtr); + if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) { + F.addFnAttr(AttrName); + Changed = true; + } } } } - if (NeedQueuePtr || HasApertureRegs) + if (NeedQueuePtr || (!IsFunc && HasApertureRegs)) continue; if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { - if (castRequiresQueuePtr(ASC)) { + if (!HasApertureRegs && castRequiresQueuePtr(ASC)) { NeedQueuePtr = true; continue; } @@ -316,7 +339,8 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) { + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc, + HasApertureRegs)) { NeedQueuePtr = true; break; } @@ -332,8 +356,13 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { // TODO: We could refine this to captured pointers that could possibly be // accessed by flat instructions. For now this is mostly a poor way of // estimating whether there are calls before argument lowering. - if (HasFlat && !IsFunc && HaveCall) { - F.addFnAttr("amdgpu-flat-scratch"); + if (!IsFunc && HaveCall) { + F.addFnAttr("amdgpu-calls"); + Changed = true; + } + + if (HaveStackObjects) { + F.addFnAttr("amdgpu-stack-objects"); Changed = true; } |