1 files changed, 13 insertions, 32 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 35922341de26..b4a8766d682e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -55,21 +55,6 @@ static cl::opt<bool> SuperAlignLDSGlobals(
     cl::init(true), cl::Hidden);
 
 namespace {
-
-SmallPtrSet<GlobalValue *, 32> getUsedList(Module &M) {
-  SmallPtrSet<GlobalValue *, 32> UsedList;
-
-  SmallVector<GlobalValue *, 32> TmpVec;
-  collectUsedGlobalVariables(M, TmpVec, true);
-  UsedList.insert(TmpVec.begin(), TmpVec.end());
-
-  TmpVec.clear();
-  collectUsedGlobalVariables(M, TmpVec, false);
-  UsedList.insert(TmpVec.begin(), TmpVec.end());
-
-  return UsedList;
-}
-
 class AMDGPULowerModuleLDS : public ModulePass {
 
   static void removeFromUsedList(Module &M, StringRef Name,
@@ -153,9 +138,6 @@ class AMDGPULowerModuleLDS : public ModulePass {
                        "");
   }
 
-private:
-  SmallPtrSet<GlobalValue *, 32> UsedList;
-
 public:
   static char ID;
 
@@ -165,9 +147,10 @@ public:
 
   bool runOnModule(Module &M) override {
     CallGraph CG = CallGraph(M);
-    UsedList = getUsedList(M);
     bool Changed = superAlignLDSGlobals(M);
-    Changed |= processUsedLDS(CG, M);
+    std::vector<GlobalVariable *> ModuleScopeVariables =
+        AMDGPU::findVariablesToLower(M, nullptr);
+    Changed |= processUsedLDS(CG, M, ModuleScopeVariables);
 
     for (Function &F : M.functions()) {
       if (F.isDeclaration())
@@ -176,10 +159,11 @@ public:
       // Only lower compute kernels' LDS.
       if (!AMDGPU::isKernel(F.getCallingConv()))
         continue;
-      Changed |= processUsedLDS(CG, M, &F);
+      std::vector<GlobalVariable *> KernelUsedVariables =
+          AMDGPU::findVariablesToLower(M, &F);
+      Changed |= processUsedLDS(CG, M, KernelUsedVariables, &F);
     }
 
-    UsedList.clear();
     return Changed;
   }
 
@@ -228,22 +212,20 @@ private:
     return Changed;
   }
 
-  bool processUsedLDS(CallGraph const &CG, Module &M, Function *F = nullptr) {
+  bool processUsedLDS(CallGraph const &CG, Module &M,
+                      std::vector<GlobalVariable *> const &LDSVarsToTransform,
+                      Function *F = nullptr) {
     LLVMContext &Ctx = M.getContext();
     const DataLayout &DL = M.getDataLayout();
 
-    // Find variables to move into new struct instance
-    std::vector<GlobalVariable *> FoundLocalVars =
-        AMDGPU::findVariablesToLower(M, F);
-
-    if (FoundLocalVars.empty()) {
+    if (LDSVarsToTransform.empty()) {
       // No variables to rewrite, no changes made.
       return false;
     }
 
     SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
-    LayoutFields.reserve(FoundLocalVars.size());
-    for (GlobalVariable *GV : FoundLocalVars) {
+    LayoutFields.reserve(LDSVarsToTransform.size());
+    for (GlobalVariable *GV : LDSVarsToTransform) {
       OptimizedStructLayoutField F(GV, DL.getTypeAllocSize(GV->getValueType()),
                                    AMDGPU::getAlign(DL, GV));
       LayoutFields.emplace_back(F);
@@ -252,7 +234,7 @@ private:
     performOptimizedStructLayout(LayoutFields);
 
     std::vector<GlobalVariable *> LocalVars;
-    LocalVars.reserve(FoundLocalVars.size()); // will be at least this large
+    LocalVars.reserve(LDSVarsToTransform.size()); // will be at least this large
     {
       // This usually won't need to insert any padding, perhaps avoid the alloc
       uint64_t CurrentOffset = 0;
@@ -352,7 +334,6 @@ private:
         GV->replaceAllUsesWith(GEP);
       }
       if (GV->use_empty()) {
-        UsedList.erase(GV);
         GV->eraseFromParent();
       }