summaryrefslogtreecommitdiff
path: root/lib/Driver/ToolChains/Cuda.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Driver/ToolChains/Cuda.cpp')
-rw-r--r--lib/Driver/ToolChains/Cuda.cpp51
1 files changed, 38 insertions, 13 deletions
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp
index 42bf164f1b3fe..935a5a37ada52 100644
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -338,24 +338,31 @@ CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple,
void CudaToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
- llvm::opt::ArgStringList &CC1Args) const {
- HostTC.addClangTargetOptions(DriverArgs, CC1Args);
+ llvm::opt::ArgStringList &CC1Args,
+ Action::OffloadKind DeviceOffloadingKind) const {
+ HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
- CC1Args.push_back("-fcuda-is-device");
+ StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
+ assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
+ assert((DeviceOffloadingKind == Action::OFK_OpenMP ||
+ DeviceOffloadingKind == Action::OFK_Cuda) &&
+ "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
- if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
- options::OPT_fno_cuda_flush_denormals_to_zero, false))
- CC1Args.push_back("-fcuda-flush-denormals-to-zero");
+ if (DeviceOffloadingKind == Action::OFK_Cuda) {
+ CC1Args.push_back("-fcuda-is-device");
- if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
- options::OPT_fno_cuda_approx_transcendentals, false))
- CC1Args.push_back("-fcuda-approx-transcendentals");
+ if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+ options::OPT_fno_cuda_flush_denormals_to_zero, false))
+ CC1Args.push_back("-fcuda-flush-denormals-to-zero");
- if (DriverArgs.hasArg(options::OPT_nocudalib))
- return;
+ if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
+ options::OPT_fno_cuda_approx_transcendentals, false))
+ CC1Args.push_back("-fcuda-approx-transcendentals");
+
+ if (DriverArgs.hasArg(options::OPT_nocudalib))
+ return;
+ }
- StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
- assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch);
if (LibDeviceFile.empty()) {
@@ -396,6 +403,24 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
const OptTable &Opts = getDriver().getOpts();
+ // For OpenMP device offloading, append derived arguments. Make sure
+ // flags are not duplicated.
+ // TODO: Append the compute capability.
+ if (DeviceOffloadKind == Action::OFK_OpenMP) {
+ for (Arg *A : Args){
+ bool IsDuplicate = false;
+ for (Arg *DALArg : *DAL){
+ if (A == DALArg) {
+ IsDuplicate = true;
+ break;
+ }
+ }
+ if (!IsDuplicate)
+ DAL->append(A);
+ }
+ return DAL;
+ }
+
for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches BoundArch