aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp56
1 files changed, 48 insertions, 8 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp b/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 03ac6b78598f..4d3275e17c38 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -45,11 +45,12 @@ public:
ABIArgInfo classifyReturnType(QualType RetTy) const;
ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
- ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;
+ ABIArgInfo classifyArgumentType(QualType Ty, bool Variadic,
+ unsigned &NumRegsLeft) const;
void computeInfo(CGFunctionInfo &FI) const override;
- Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
- QualType Ty) const override;
+ RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
+ AggValueSlot Slot) const override;
};
bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
@@ -103,19 +104,27 @@ void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+ unsigned ArgumentIndex = 0;
+ const unsigned numFixedArguments = FI.getNumRequiredArgs();
+
unsigned NumRegsLeft = MaxNumRegsForArgsRet;
for (auto &Arg : FI.arguments()) {
if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
Arg.info = classifyKernelArgumentType(Arg.type);
} else {
- Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
+ bool FixedArgument = ArgumentIndex++ < numFixedArguments;
+ Arg.info = classifyArgumentType(Arg.type, !FixedArgument, NumRegsLeft);
}
}
}
-Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
- QualType Ty) const {
- llvm_unreachable("AMDGPU does not support varargs");
+RValue AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+ QualType Ty, AggValueSlot Slot) const {
+ const bool IsIndirect = false;
+ const bool AllowHigherAlign = false;
+ return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
+ getContext().getTypeInfoInChars(Ty),
+ CharUnits::fromQuantity(4), AllowHigherAlign, Slot);
}
ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
@@ -197,12 +206,20 @@ ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
}
-ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
+ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic,
unsigned &NumRegsLeft) const {
assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");
Ty = useFirstFieldIfTransparentUnion(Ty);
+ if (Variadic) {
+ return ABIArgInfo::getDirect(/*T=*/nullptr,
+ /*Offset=*/0,
+ /*Padding=*/nullptr,
+ /*CanBeFlattened=*/false,
+ /*Align=*/0);
+ }
+
if (isAggregateTypeForABI(Ty)) {
// Records with non-trivial destructors/copy-constructors should not be
// passed by value.
@@ -356,6 +373,29 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
if (NumVGPR != 0)
F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
+
+ if (const auto *Attr = FD->getAttr<AMDGPUMaxNumWorkGroupsAttr>()) {
+ uint32_t X = Attr->getMaxNumWorkGroupsX()
+ ->EvaluateKnownConstInt(M.getContext())
+ .getExtValue();
+ // Y and Z dimensions default to 1 if not specified
+ uint32_t Y = Attr->getMaxNumWorkGroupsY()
+ ? Attr->getMaxNumWorkGroupsY()
+ ->EvaluateKnownConstInt(M.getContext())
+ .getExtValue()
+ : 1;
+ uint32_t Z = Attr->getMaxNumWorkGroupsZ()
+ ? Attr->getMaxNumWorkGroupsZ()
+ ->EvaluateKnownConstInt(M.getContext())
+ .getExtValue()
+ : 1;
+
+ llvm::SmallString<32> AttrVal;
+ llvm::raw_svector_ostream OS(AttrVal);
+ OS << X << ',' << Y << ',' << Z;
+
+ F->addFnAttr("amdgpu-max-num-workgroups", AttrVal.str());
+ }
}
/// Emits control constants used to change per-architecture behaviour in the