summaryrefslogtreecommitdiff
path: root/lib/Driver/ToolChains/Cuda.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Driver/ToolChains/Cuda.cpp')
-rw-r--r--lib/Driver/ToolChains/Cuda.cpp122
1 files changed, 81 insertions, 41 deletions
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp
index 7fb4ae4ea9cf6..57b8d4340e3b3 100644
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -11,7 +11,6 @@
#include "CommonArgs.h"
#include "InputInfo.h"
#include "clang/Basic/Cuda.h"
-#include "clang/Basic/VirtualFileSystem.h"
#include "clang/Config/config.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Distro.h"
@@ -23,6 +22,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <system_error>
using namespace clang::driver;
@@ -59,6 +59,8 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
return CudaVersion::CUDA_91;
if (Major == 9 && Minor == 2)
return CudaVersion::CUDA_92;
+ if (Major == 10 && Minor == 0)
+ return CudaVersion::CUDA_100;
return CudaVersion::UNKNOWN;
}
@@ -112,7 +114,7 @@ CudaInstallationDetector::CudaInstallationDetector(
for (const char *Ver : Versions)
Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
- if (Distro(D.getVFS()).IsDebian())
+ if (Distro(D.getVFS()).IsDebian() || Distro(D.getVFS()).IsUbuntu())
// Special case for Debian to have nvidia-cuda-toolkit work
// out of the box. More info on http://bugs.debian.org/882505
Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
@@ -165,7 +167,7 @@ CudaInstallationDetector::CudaInstallationDetector(
if (FS.exists(FilePath)) {
for (const char *GpuArchName :
{"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
- "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
+ "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
const CudaArch GpuArch = StringToCudaArch(GpuArchName);
if (Version >= MinVersionForCudaArch(GpuArch) &&
Version <= MaxVersionForCudaArch(GpuArch))
@@ -276,32 +278,44 @@ void CudaInstallationDetector::print(raw_ostream &OS) const {
}
namespace {
- /// Debug info kind.
-enum DebugInfoKind {
- NoDebug, /// No debug info.
- LineTableOnly, /// Line tables only.
- FullDebug /// Full debug info.
+/// Debug info level for the NVPTX devices. We may need to emit different debug
+/// info level for the host and for the device itselfi. This type controls
+/// emission of the debug info for the devices. It either prohibits disable info
+/// emission completely, or emits debug directives only, or emits same debug
+/// info as for the host.
+enum DeviceDebugInfoLevel {
+ DisableDebugInfo, /// Do not emit debug info for the devices.
+ DebugDirectivesOnly, /// Emit only debug directives.
+ EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
+ /// host.
};
} // anonymous namespace
-static DebugInfoKind mustEmitDebugInfo(const ArgList &Args) {
- Arg *A = Args.getLastArg(options::OPT_O_Group);
- if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
- options::OPT_no_cuda_noopt_device_debug,
- !A || A->getOption().matches(options::OPT_O0))) {
- if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
- const Option &Opt = A->getOption();
- if (Opt.matches(options::OPT_gN_Group)) {
- if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
- return NoDebug;
- if (Opt.matches(options::OPT_gline_tables_only) ||
- Opt.matches(options::OPT_ggdb1))
- return LineTableOnly;
- }
- return FullDebug;
+/// Define debug info level for the NVPTX devices. If the debug info for both
+/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
+/// only debug directives are requested for the both host and device
+/// (-gline-directvies-only), or the debug info only for the device is disabled
+/// (optimization is on and --cuda-noopt-device-debug was not specified), the
+/// debug directves only must be emitted for the device. Otherwise, use the same
+/// debug info level just like for the host (with the limitations of only
+/// supported DWARF2 standard).
+static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
+ const Arg *A = Args.getLastArg(options::OPT_O_Group);
+ bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
+ Args.hasFlag(options::OPT_cuda_noopt_device_debug,
+ options::OPT_no_cuda_noopt_device_debug,
+ /*Default=*/false);
+ if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
+ const Option &Opt = A->getOption();
+ if (Opt.matches(options::OPT_gN_Group)) {
+ if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
+ return DisableDebugInfo;
+ if (Opt.matches(options::OPT_gline_directives_only))
+ return DebugDirectivesOnly;
}
+ return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
}
- return NoDebug;
+ return DisableDebugInfo;
}
void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -335,8 +349,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
ArgStringList CmdArgs;
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
- DebugInfoKind DIKind = mustEmitDebugInfo(Args);
- if (DIKind == FullDebug) {
+ DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
+ if (DIKind == EmitSameDebugInfoAsHost) {
// ptxas does not accept -g option if optimization is enabled, so
// we ignore the compiler's -O* options if we want debug info.
CmdArgs.push_back("-g");
@@ -372,7 +386,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
// to no optimizations, but ptxas's default is -O3.
CmdArgs.push_back("-O0");
}
- if (DIKind == LineTableOnly)
+ if (DIKind == DebugDirectivesOnly)
CmdArgs.push_back("-lineinfo");
// Pass -v to ptxas if it was passed to the driver.
@@ -396,8 +410,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
options::OPT_fnoopenmp_relocatable_target,
/*Default=*/true);
else if (JA.isOffloading(Action::OFK_Cuda))
- Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
- options::OPT_fno_cuda_rdc, /*Default=*/false);
+ Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
+ options::OPT_fno_gpu_rdc, /*Default=*/false);
if (Relocatable)
CmdArgs.push_back("-c");
@@ -443,7 +457,7 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
CmdArgs.push_back(Args.MakeArgString("--create"));
CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
- if (mustEmitDebugInfo(Args) == FullDebug)
+ if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
CmdArgs.push_back("-g");
for (const auto& II : Inputs) {
@@ -496,7 +510,7 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Output.getFilename());
} else
assert(Output.isNothing() && "Invalid output.");
- if (mustEmitDebugInfo(Args) == FullDebug)
+ if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
CmdArgs.push_back("-g");
if (Args.hasArg(options::OPT_v))
@@ -509,6 +523,11 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-arch");
CmdArgs.push_back(Args.MakeArgString(GPUArch));
+ // Assume that the directory specified with --libomptarget_nvptx_path
+ // contains the static library libomptarget-nvptx.a.
+ if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
+ CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
+
// Add paths specified in LIBRARY_PATH environment variable as -L options.
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
@@ -602,9 +621,9 @@ void CudaToolChain::addClangTargetOptions(
options::OPT_fno_cuda_approx_transcendentals, false))
CC1Args.push_back("-fcuda-approx-transcendentals");
- if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
+ if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))
- CC1Args.push_back("-fcuda-rdc");
+ CC1Args.push_back("-fgpu-rdc");
}
if (DriverArgs.hasArg(options::OPT_nocudalib))
@@ -621,13 +640,16 @@ void CudaToolChain::addClangTargetOptions(
return;
}
- CC1Args.push_back("-mlink-cuda-bitcode");
+ CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
// Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
// defaults to. Use PTX4.2 by default, which is the PTX version that came with
// CUDA-7.0.
const char *PtxFeature = "+ptx42";
+ // TODO(tra): CUDA-10+ needs PTX 6.3 to support new features. However that
+ // requires fair amount of work on LLVM side. We'll keep using PTX 6.1 until
+ // all prerequisites are in place.
if (CudaInstallation.version() >= CudaVersion::CUDA_91) {
// CUDA-9.1 uses new instructions that are only available in PTX6.1+
PtxFeature = "+ptx61";
@@ -642,12 +664,9 @@ void CudaToolChain::addClangTargetOptions(
if (DeviceOffloadingKind == Action::OFK_OpenMP) {
SmallVector<StringRef, 8> LibraryPaths;
- // Add path to lib and/or lib64 folders.
- SmallString<256> DefaultLibPath =
- llvm::sys::path::parent_path(getDriver().Dir);
- llvm::sys::path::append(DefaultLibPath,
- Twine("lib") + CLANG_LIBDIR_SUFFIX);
- LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
+ if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
+ LibraryPaths.push_back(A->getValue());
// Add user defined library paths from LIBRARY_PATH.
llvm::Optional<std::string> LibPath =
@@ -660,6 +679,12 @@ void CudaToolChain::addClangTargetOptions(
LibraryPaths.emplace_back(Path.trim());
}
+ // Add path to lib / lib64 folder.
+ SmallString<256> DefaultLibPath =
+ llvm::sys::path::parent_path(getDriver().Dir);
+ llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
+ LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
std::string LibOmpTargetName =
"libomptarget-nvptx-" + GpuArch.str() + ".bc";
bool FoundBCLibrary = false;
@@ -667,7 +692,7 @@ void CudaToolChain::addClangTargetOptions(
SmallString<128> LibOmpTargetFile(LibraryPath);
llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
if (llvm::sys::fs::exists(LibOmpTargetFile)) {
- CC1Args.push_back("-mlink-cuda-bitcode");
+ CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
FoundBCLibrary = true;
break;
@@ -691,6 +716,21 @@ bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
O.matches(options::OPT_gcolumn_info);
}
+void CudaToolChain::adjustDebugInfoKind(
+ codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
+ switch (mustEmitDebugInfo(Args)) {
+ case DisableDebugInfo:
+ DebugInfoKind = codegenoptions::NoDebugInfo;
+ break;
+ case DebugDirectivesOnly:
+ DebugInfoKind = codegenoptions::DebugDirectivesOnly;
+ break;
+ case EmitSameDebugInfoAsHost:
+ // Use same debug info level as the host.
+ break;
+ }
+}
+
void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
// Check our CUDA version if we're going to include the CUDA headers.