summaryrefslogtreecommitdiff
path: root/lib/Driver/ToolChains/HIP.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Driver/ToolChains/HIP.cpp')
-rw-r--r--lib/Driver/ToolChains/HIP.cpp59
1 files changed, 54 insertions, 5 deletions
diff --git a/lib/Driver/ToolChains/HIP.cpp b/lib/Driver/ToolChains/HIP.cpp
index 03acf45a9b313..868765cf88e5a 100644
--- a/lib/Driver/ToolChains/HIP.cpp
+++ b/lib/Driver/ToolChains/HIP.cpp
@@ -24,6 +24,12 @@ using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
+#if _WIN32 || _WIN64
+#define NULL_FILE "nul"
+#else
+#define NULL_FILE "/dev/null"
+#endif
+
namespace {
static void addBCLib(Compilation &C, const ArgList &Args,
@@ -81,8 +87,8 @@ const char *AMDGCN::Linker::constructLLVMLinkCommand(
else
FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
- BCLibs.append({"opencl.amdgcn.bc",
- "ocml.amdgcn.bc", "ockl.amdgcn.bc", "irif.amdgcn.bc",
+ BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc",
+ "ocml.amdgcn.bc", "ockl.amdgcn.bc",
"oclc_finite_only_off.amdgcn.bc",
FlushDenormalControlBC,
"oclc_correctly_rounded_sqrt_on.amdgcn.bc",
@@ -154,7 +160,7 @@ const char *AMDGCN::Linker::constructLlcCommand(
llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
// Construct llc command.
ArgStringList LlcArgs{InputFileName, "-mtriple=amdgcn-amd-amdhsa",
- "-filetype=obj",
+ "-filetype=obj", "-mattr=-code-object-v3",
Args.MakeArgString("-mcpu=" + SubArchName), "-o"};
std::string LlcOutputFileName =
C.getDriver().GetTemporaryPath(OutputFilePrefix, "o");
@@ -184,6 +190,40 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
C.addCommand(llvm::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs));
}
+// Construct a clang-offload-bundler command to bundle code objects for
+// different GPU's into a HIP fat binary.
+void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
+ StringRef OutputFileName, const InputInfoList &Inputs,
+ const llvm::opt::ArgList &Args, const Tool& T) {
+ // Construct clang-offload-bundler command to bundle object files for
+ // for different GPU archs.
+ ArgStringList BundlerArgs;
+ BundlerArgs.push_back(Args.MakeArgString("-type=o"));
+
+ // ToDo: Remove the dummy host binary entry which is required by
+ // clang-offload-bundler.
+ std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
+ std::string BundlerInputArg = "-inputs=" NULL_FILE;
+
+ for (const auto &II : Inputs) {
+ const auto* A = II.getAction();
+ BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
+ StringRef(A->getOffloadingArch()).str();
+ BundlerInputArg = BundlerInputArg + "," + II.getFilename();
+ }
+ BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
+ BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
+
+ auto BundlerOutputArg =
+ Args.MakeArgString(std::string("-outputs=").append(OutputFileName));
+ BundlerArgs.push_back(BundlerOutputArg);
+
+ SmallString<128> BundlerPath(C.getDriver().Dir);
+ llvm::sys::path::append(BundlerPath, "clang-offload-bundler");
+ const char *Bundler = Args.MakeArgString(BundlerPath);
+ C.addCommand(llvm::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
+}
+
// For amdgcn the inputs of the linker job are device bitcode and output is
// object file. It calls llvm-link, opt, llc, then lld steps.
void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -192,6 +232,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const ArgList &Args,
const char *LinkingOutput) const {
+ if (JA.getType() == types::TY_HIP_FATBIN)
+ return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
+
assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&
"Unsupported target");
@@ -244,9 +287,15 @@ void HIPToolChain::addClangTargetOptions(
options::OPT_fno_cuda_approx_transcendentals, false))
CC1Args.push_back("-fcuda-approx-transcendentals");
- if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
+ if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))
- CC1Args.push_back("-fcuda-rdc");
+ CC1Args.push_back("-fgpu-rdc");
+
+ // Default to "hidden" visibility, as object level linking will not be
+ // supported for the foreseeable future.
+ if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
+ options::OPT_fvisibility_ms_compat))
+ CC1Args.append({"-fvisibility", "hidden"});
}
llvm::opt::DerivedArgList *