diff options
Diffstat (limited to 'lib/Driver/ToolChains/HIP.cpp')
-rw-r--r-- | lib/Driver/ToolChains/HIP.cpp | 59 |
1 files changed, 54 insertions, 5 deletions
diff --git a/lib/Driver/ToolChains/HIP.cpp b/lib/Driver/ToolChains/HIP.cpp index 03acf45a9b313..868765cf88e5a 100644 --- a/lib/Driver/ToolChains/HIP.cpp +++ b/lib/Driver/ToolChains/HIP.cpp @@ -24,6 +24,12 @@ using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; +#if _WIN32 || _WIN64 +#define NULL_FILE "nul" +#else +#define NULL_FILE "/dev/null" +#endif + namespace { static void addBCLib(Compilation &C, const ArgList &Args, @@ -81,8 +87,8 @@ const char *AMDGCN::Linker::constructLLVMLinkCommand( else FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc"; - BCLibs.append({"opencl.amdgcn.bc", - "ocml.amdgcn.bc", "ockl.amdgcn.bc", "irif.amdgcn.bc", + BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", + "ocml.amdgcn.bc", "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc", FlushDenormalControlBC, "oclc_correctly_rounded_sqrt_on.amdgcn.bc", @@ -154,7 +160,7 @@ const char *AMDGCN::Linker::constructLlcCommand( llvm::StringRef OutputFilePrefix, const char *InputFileName) const { // Construct llc command. ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa", - "-filetype=obj", + "-filetype=obj", "-mattr=-code-object-v3", Args.MakeArgString("-mcpu=" + SubArchName), "-o"}; std::string LlcOutputFileName = C.getDriver().GetTemporaryPath(OutputFilePrefix, "o"); @@ -184,6 +190,40 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, C.addCommand(llvm::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs)); } +// Construct a clang-offload-bundler command to bundle code objects for +// different GPU's into a HIP fat binary. +void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, + StringRef OutputFileName, const InputInfoList &Inputs, + const llvm::opt::ArgList &Args, const Tool& T) { + // Construct clang-offload-bundler command to bundle object files for + // for different GPU archs. + ArgStringList BundlerArgs; + BundlerArgs.push_back(Args.MakeArgString("-type=o")); + + // ToDo: Remove the dummy host binary entry which is required by + // clang-offload-bundler. + std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux"; + std::string BundlerInputArg = "-inputs=" NULL_FILE; + + for (const auto &II : Inputs) { + const auto* A = II.getAction(); + BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" + + StringRef(A->getOffloadingArch()).str(); + BundlerInputArg = BundlerInputArg + "," + II.getFilename(); + } + BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); + BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg)); + + auto BundlerOutputArg = + Args.MakeArgString(std::string("-outputs=").append(OutputFileName)); + BundlerArgs.push_back(BundlerOutputArg); + + SmallString<128> BundlerPath(C.getDriver().Dir); + llvm::sys::path::append(BundlerPath, "clang-offload-bundler"); + const char *Bundler = Args.MakeArgString(BundlerPath); + C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs)); +} + // For amdgcn the inputs of the linker job are device bitcode and output is // object file. It calls llvm-link, opt, llc, then lld steps. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -192,6 +232,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, const ArgList &Args, const char *LinkingOutput) const { + if (JA.getType() == types::TY_HIP_FATBIN) + return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this); + assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn && "Unsupported target"); @@ -244,9 +287,15 @@ void HIPToolChain::addClangTargetOptions( options::OPT_fno_cuda_approx_transcendentals, false)) CC1Args.push_back("-fcuda-approx-transcendentals"); - if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, + if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) - CC1Args.push_back("-fcuda-rdc"); + CC1Args.push_back("-fgpu-rdc"); + + // Default to "hidden" visibility, as object level linking will not be + // supported for the foreseeable future. + if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, + options::OPT_fvisibility_ms_compat)) + CC1Args.append({"-fvisibility", "hidden"}); } llvm::opt::DerivedArgList * |