1 files changed, 28 insertions, 8 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 64acd6efe028..717145b7af53 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -7,17 +7,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMachineFunction.h"
-#include "AMDGPUSubtarget.h"
 #include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPUSubtarget.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
-AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
-  MachineFunctionInfo(),
-  Mode(MF.getFunction()),
-  IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
-  NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
+AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF)
+    : MachineFunctionInfo(), Mode(MF.getFunction()),
+      IsEntryFunction(
+          AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())),
+      IsModuleEntryFunction(
+          AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
+      NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
 
   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
@@ -49,10 +52,27 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
   /// TODO: We should sort these to minimize wasted space due to alignment
   /// padding. Currently the padding is decided by the first encountered use
   /// during lowering.
-  unsigned Offset = LDSSize = alignTo(LDSSize, Alignment);
+  unsigned Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
 
   Entry.first->second = Offset;
-  LDSSize += DL.getTypeAllocSize(GV.getValueType());
+  StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
+
+  // Update the LDS size considering the padding to align the dynamic shared
+  // memory.
+  LDSSize = alignTo(StaticLDSSize, DynLDSAlign);
 
   return Offset;
 }
+
+void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
+                                           const GlobalVariable &GV) {
+  assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
+
+  Align Alignment =
+      DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
+  if (Alignment <= DynLDSAlign)
+    return;
+
+  LDSSize = alignTo(StaticLDSSize, Alignment);
+  DynLDSAlign = Alignment;
+}