diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 8eeda7b67b73..b9c59f4c615a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -113,7 +113,7 @@ MetadataStreamerV2::getAddressSpaceQualifier( ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const { - if (TypeQual.find("pipe") != StringRef::npos) + if (TypeQual.contains("pipe")) return ValueKind::Pipe; return StringSwitch<ValueKind>(BaseTypeName) @@ -201,10 +201,11 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF, Align MaxKernArgAlign; HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F, MaxKernArgAlign); + HSACodeProps.mKernargSegmentAlign = + std::max(MaxKernArgAlign, Align(4)).value(); + HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; - HSACodeProps.mKernargSegmentAlign = - std::max(MaxKernArgAlign, Align(4)).value(); HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR; HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR; @@ -533,7 +534,7 @@ MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const { StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual, StringRef BaseTypeName) const { - if (TypeQual.find("pipe") != StringRef::npos) + if (TypeQual.contains("pipe")) return "pipe"; return StringSwitch<StringRef>(BaseTypeName) @@ -665,6 +666,10 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func, Func.getFnAttribute("runtime-handle").getValueAsString().str(), /*Copy=*/true); } + if (Func.hasFnAttribute("device-init")) + Kern[".kind"] = Kern.getDocument()->getNode("init"); + else if (Func.hasFnAttribute("device-fini")) + Kern[".kind"] = Kern.getDocument()->getNode("fini"); } void MetadataStreamerV3::emitKernelArgs(const Function &Func, @@ -794,7 +799,8 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, if (!HiddenArgNumBytes) return; - auto &DL = Func.getParent()->getDataLayout(); + const Module *M = Func.getParent(); + auto &DL = M->getDataLayout(); auto Int64Ty = Type::getInt64Ty(Func.getContext()); if (HiddenArgNumBytes >= 8) @@ -810,16 +816,16 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func, auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, otherwise emit dummy - // "none" argument. + // Emit "printf buffer" argument if printf is used, emit "hostcall buffer" + // if "hostcall" module flag is set, otherwise emit dummy "none" argument. if (HiddenArgNumBytes >= 32) { - if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + if (M->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, Args); - else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) { + else if (M->getModuleFlag("amdgpu_hostcall")) { // The printf runtime binding pass should have ensured that hostcall and // printf are not used in the same module. - assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); + assert(!M->getNamedMetadata("llvm.printf.fmts")); emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); } else @@ -862,6 +868,8 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF, Kern.getDocument()->getNode(ProgramInfo.LDSSize); Kern[".private_segment_fixed_size"] = Kern.getDocument()->getNode(ProgramInfo.ScratchSize); + + // FIXME: The metadata treats the minimum as 16? Kern[".kernarg_segment_align"] = Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value()); Kern[".wavefront_size"] = |
