summaryrefslogtreecommitdiff
path: root/lib/Driver/ToolChains/Cuda.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Driver/ToolChains/Cuda.cpp')
-rw-r--r--lib/Driver/ToolChains/Cuda.cpp228
1 files changed, 188 insertions, 40 deletions
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp
index bc4820797b2f4..d17c4c39532ad 100644
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -8,18 +8,21 @@
//===----------------------------------------------------------------------===//
#include "Cuda.h"
-#include "InputInfo.h"
#include "CommonArgs.h"
+#include "InputInfo.h"
#include "clang/Basic/Cuda.h"
-#include "clang/Config/config.h"
#include "clang/Basic/VirtualFileSystem.h"
-#include "clang/Driver/Distro.h"
+#include "clang/Config/config.h"
#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Distro.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "llvm/Option/ArgList.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
#include <system_error>
using namespace clang::driver;
@@ -52,6 +55,10 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
return CudaVersion::CUDA_80;
if (Major == 9 && Minor == 0)
return CudaVersion::CUDA_90;
+ if (Major == 9 && Minor == 1)
+ return CudaVersion::CUDA_91;
+ if (Major == 9 && Minor == 2)
+ return CudaVersion::CUDA_92;
return CudaVersion::UNKNOWN;
}
@@ -59,42 +66,75 @@ CudaInstallationDetector::CudaInstallationDetector(
const Driver &D, const llvm::Triple &HostTriple,
const llvm::opt::ArgList &Args)
: D(D) {
- SmallVector<std::string, 4> CudaPathCandidates;
+ struct Candidate {
+ std::string Path;
+ bool StrictChecking;
+
+ Candidate(std::string Path, bool StrictChecking = false)
+ : Path(Path), StrictChecking(StrictChecking) {}
+ };
+ SmallVector<Candidate, 4> Candidates;
// In decreasing order so we prefer newer versions to older versions.
std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};
if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
- CudaPathCandidates.push_back(
- Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ));
+ Candidates.emplace_back(
+ Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
} else if (HostTriple.isOSWindows()) {
for (const char *Ver : Versions)
- CudaPathCandidates.push_back(
+ Candidates.emplace_back(
D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
Ver);
} else {
- CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
+ if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
+ // Try to find ptxas binary. If the executable is located in a directory
+ // called 'bin/', its parent directory might be a good guess for a valid
+ // CUDA installation.
+ // However, some distributions might installs 'ptxas' to /usr/bin. In that
+ // case the candidate would be '/usr' which passes the following checks
+ // because '/usr/include' exists as well. To avoid this case, we always
+ // check for the directory potentially containing files for libdevice,
+ // even if the user passes -nocudalib.
+ if (llvm::ErrorOr<std::string> ptxas =
+ llvm::sys::findProgramByName("ptxas")) {
+ SmallString<256> ptxasAbsolutePath;
+ llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
+
+ StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
+ if (llvm::sys::path::filename(ptxasDir) == "bin")
+ Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
+ /*StrictChecking=*/true);
+ }
+ }
+
+ Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");
for (const char *Ver : Versions)
- CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver);
+ Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
if (Distro(D.getVFS()).IsDebian())
// Special case for Debian to have nvidia-cuda-toolkit work
// out of the box. More info on http://bugs.debian.org/882505
- CudaPathCandidates.push_back(D.SysRoot + "/usr/lib/cuda");
+ Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
}
- for (const auto &CudaPath : CudaPathCandidates) {
- if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
+ bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
+
+ for (const auto &Candidate : Candidates) {
+ InstallPath = Candidate.Path;
+ if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
continue;
- InstallPath = CudaPath;
- BinPath = CudaPath + "/bin";
+ BinPath = InstallPath + "/bin";
IncludePath = InstallPath + "/include";
LibDevicePath = InstallPath + "/nvvm/libdevice";
auto &FS = D.getVFS();
if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
continue;
+ bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
+ if (CheckLibDevice && !FS.exists(LibDevicePath))
+ continue;
// On Linux, we have both lib and lib64 directories, and we need to choose
// based on our triple. On MacOS, we have only a lib directory.
@@ -119,14 +159,18 @@ CudaInstallationDetector::CudaInstallationDetector(
Version = ParseCudaVersionFile((*VersionFile)->getBuffer());
}
- if (Version == CudaVersion::CUDA_90) {
- // CUDA-9 uses single libdevice file for all GPU variants.
+ if (Version >= CudaVersion::CUDA_90) {
+ // CUDA-9+ uses single libdevice file for all GPU variants.
std::string FilePath = LibDevicePath + "/libdevice.10.bc";
if (FS.exists(FilePath)) {
- for (const char *GpuArch :
- {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53",
- "sm_60", "sm_61", "sm_62", "sm_70"})
- LibDeviceMap[GpuArch] = FilePath;
+ for (const char *GpuArchName :
+ {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
+ "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
+ const CudaArch GpuArch = StringToCudaArch(GpuArchName);
+ if (Version >= MinVersionForCudaArch(GpuArch) &&
+ Version <= MaxVersionForCudaArch(GpuArch))
+ LibDeviceMap[GpuArchName] = FilePath;
+ }
}
} else {
std::error_code EC;
@@ -142,7 +186,7 @@ CudaInstallationDetector::CudaInstallationDetector(
StringRef GpuArch = FileName.slice(
LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
LibDeviceMap[GpuArch] = FilePath.str();
- // Insert map entries for specifc devices with this compute
+ // Insert map entries for specific devices with this compute
// capability. NVCC's choice of the libdevice library version is
// rather peculiar and depends on the CUDA version.
if (GpuArch == "compute_20") {
@@ -174,7 +218,7 @@ CudaInstallationDetector::CudaInstallationDetector(
// Check that we have found at least one libdevice that we can link in if
// -nocudalib hasn't been specified.
- if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib))
+ if (LibDeviceMap.empty() && !NoCudaLib)
continue;
IsValid = true;
@@ -231,6 +275,35 @@ void CudaInstallationDetector::print(raw_ostream &OS) const {
<< CudaVersionToString(Version) << "\n";
}
+namespace {
+ /// Debug info kind.
+enum DebugInfoKind {
+ NoDebug, /// No debug info.
+ LineTableOnly, /// Line tables only.
+ FullDebug /// Full debug info.
+};
+} // anonymous namespace
+
+static DebugInfoKind mustEmitDebugInfo(const ArgList &Args) {
+ Arg *A = Args.getLastArg(options::OPT_O_Group);
+ if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
+ options::OPT_no_cuda_noopt_device_debug,
+ !A || A->getOption().matches(options::OPT_O0))) {
+ if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
+ const Option &Opt = A->getOption();
+ if (Opt.matches(options::OPT_gN_Group)) {
+ if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
+ return NoDebug;
+ if (Opt.matches(options::OPT_gline_tables_only) ||
+ Opt.matches(options::OPT_ggdb1))
+ return LineTableOnly;
+ }
+ return FullDebug;
+ }
+ }
+ return NoDebug;
+}
+
void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
@@ -262,8 +335,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
ArgStringList CmdArgs;
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
- if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
- options::OPT_no_cuda_noopt_device_debug, false)) {
+ DebugInfoKind DIKind = mustEmitDebugInfo(Args);
+ if (DIKind == FullDebug) {
// ptxas does not accept -g option if optimization is enabled, so
// we ignore the compiler's -O* options if we want debug info.
CmdArgs.push_back("-g");
@@ -299,6 +372,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
// to no optimizations, but ptxas's default is -O3.
CmdArgs.push_back("-O0");
}
+ if (DIKind == LineTableOnly)
+ CmdArgs.push_back("-lineinfo");
// Pass -v to ptxas if it was passed to the driver.
if (Args.hasArg(options::OPT_v))
@@ -314,11 +389,17 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
CmdArgs.push_back(Args.MakeArgString(A));
- // In OpenMP we need to generate relocatable code.
- if (JA.isOffloading(Action::OFK_OpenMP) &&
- Args.hasFlag(options::OPT_fopenmp_relocatable_target,
- options::OPT_fnoopenmp_relocatable_target,
- /*Default=*/ true))
+ bool Relocatable = false;
+ if (JA.isOffloading(Action::OFK_OpenMP))
+ // In OpenMP we need to generate relocatable code.
+ Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
+ options::OPT_fnoopenmp_relocatable_target,
+ /*Default=*/true);
+ else if (JA.isOffloading(Action::OFK_Cuda))
+ Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
+ options::OPT_fno_cuda_rdc, /*Default=*/false);
+
+ if (Relocatable)
CmdArgs.push_back("-c");
const char *Exec;
@@ -329,6 +410,22 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
}
+static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) {
+ bool includePTX = true;
+ for (Arg *A : Args) {
+ if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
+ A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
+ continue;
+ A->claim();
+ const StringRef ArchStr = A->getValue();
+ if (ArchStr == "all" || ArchStr == gpu_arch) {
+ includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
+ continue;
+ }
+ }
+ return includePTX;
+}
+
// All inputs to this linker must be from CudaDeviceActions, as we need to look
// at the Inputs' Actions in order to figure out which GPU architecture they
// correspond to.
@@ -346,6 +443,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
CmdArgs.push_back(Args.MakeArgString("--create"));
CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
+ if (mustEmitDebugInfo(Args) == FullDebug)
+ CmdArgs.push_back("-g");
for (const auto& II : Inputs) {
auto *A = II.getAction();
@@ -356,6 +455,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
"Device action expected to have associated a GPU architecture!");
CudaArch gpu_arch = StringToCudaArch(gpu_arch_str);
+ if (II.getType() == types::TY_PP_Asm &&
+ !shouldIncludePTX(Args, gpu_arch_str))
+ continue;
// We need to pass an Arch of the form "sm_XX" for cubin files and
// "compute_XX" for ptx.
const char *Arch =
@@ -394,7 +496,7 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(Output.getFilename());
} else
assert(Output.isNothing() && "Invalid output.");
- if (Args.hasArg(options::OPT_g_Flag))
+ if (mustEmitDebugInfo(Args) == FullDebug)
CmdArgs.push_back("-g");
if (Args.hasArg(options::OPT_v))
@@ -499,6 +601,10 @@ void CudaToolChain::addClangTargetOptions(
if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
options::OPT_fno_cuda_approx_transcendentals, false))
CC1Args.push_back("-fcuda-approx-transcendentals");
+
+ if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
+ false))
+ CC1Args.push_back("-fcuda-rdc");
}
if (DriverArgs.hasArg(options::OPT_nocudalib))
@@ -518,16 +624,58 @@ void CudaToolChain::addClangTargetOptions(
CC1Args.push_back("-mlink-cuda-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
- if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
- // CUDA-9 uses new instructions that are only available in PTX6.0
- CC1Args.push_back("-target-feature");
- CC1Args.push_back("+ptx60");
- } else {
- // Libdevice in CUDA-7.0 requires PTX version that's more recent
- // than LLVM defaults to. Use PTX4.2 which is the PTX version that
- // came with CUDA-7.0.
- CC1Args.push_back("-target-feature");
- CC1Args.push_back("+ptx42");
+ // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
+ // defaults to. Use PTX4.2 by default, which is the PTX version that came with
+ // CUDA-7.0.
+ const char *PtxFeature = "+ptx42";
+ if (CudaInstallation.version() >= CudaVersion::CUDA_91) {
+ // CUDA-9.1 uses new instructions that are only available in PTX6.1+
+ PtxFeature = "+ptx61";
+ } else if (CudaInstallation.version() >= CudaVersion::CUDA_90) {
+ // CUDA-9.0 uses new instructions that are only available in PTX6.0+
+ PtxFeature = "+ptx60";
+ }
+ CC1Args.append({"-target-feature", PtxFeature});
+ if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
+ options::OPT_fno_cuda_short_ptr, false))
+ CC1Args.append({"-mllvm", "--nvptx-short-ptr"});
+
+ if (DeviceOffloadingKind == Action::OFK_OpenMP) {
+ SmallVector<StringRef, 8> LibraryPaths;
+ // Add path to lib and/or lib64 folders.
+ SmallString<256> DefaultLibPath =
+ llvm::sys::path::parent_path(getDriver().Dir);
+ llvm::sys::path::append(DefaultLibPath,
+ Twine("lib") + CLANG_LIBDIR_SUFFIX);
+ LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
+ // Add user defined library paths from LIBRARY_PATH.
+ llvm::Optional<std::string> LibPath =
+ llvm::sys::Process::GetEnv("LIBRARY_PATH");
+ if (LibPath) {
+ SmallVector<StringRef, 8> Frags;
+ const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
+ llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
+ for (StringRef Path : Frags)
+ LibraryPaths.emplace_back(Path.trim());
+ }
+
+ std::string LibOmpTargetName =
+ "libomptarget-nvptx-" + GpuArch.str() + ".bc";
+ bool FoundBCLibrary = false;
+ for (StringRef LibraryPath : LibraryPaths) {
+ SmallString<128> LibOmpTargetFile(LibraryPath);
+ llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
+ if (llvm::sys::fs::exists(LibOmpTargetFile)) {
+ CC1Args.push_back("-mlink-cuda-bitcode");
+ CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
+ FoundBCLibrary = true;
+ break;
+ }
+ }
+ if (!FoundBCLibrary)
+ getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
+ << LibOmpTargetName;
}
}