1 files changed, 81 insertions, 41 deletions
diff --git a/lib/Driver/ToolChains/Cuda.cpp b/lib/Driver/ToolChains/Cuda.cpp
index 7fb4ae4ea9cf6..57b8d4340e3b3 100644
--- a/lib/Driver/ToolChains/Cuda.cpp
+++ b/lib/Driver/ToolChains/Cuda.cpp
@@ -11,7 +11,6 @@
 #include "CommonArgs.h"
 #include "InputInfo.h"
 #include "clang/Basic/Cuda.h"
-#include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Distro.h"
@@ -23,6 +22,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/VirtualFileSystem.h"
 #include <system_error>
 
 using namespace clang::driver;
@@ -59,6 +59,8 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {
     return CudaVersion::CUDA_91;
   if (Major == 9 && Minor == 2)
     return CudaVersion::CUDA_92;
+  if (Major == 10 && Minor == 0)
+    return CudaVersion::CUDA_100;
   return CudaVersion::UNKNOWN;
 }
 
@@ -112,7 +114,7 @@ CudaInstallationDetector::CudaInstallationDetector(
     for (const char *Ver : Versions)
       Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);
 
-    if (Distro(D.getVFS()).IsDebian())
+    if (Distro(D.getVFS()).IsDebian() || Distro(D.getVFS()).IsUbuntu())
       // Special case for Debian to have nvidia-cuda-toolkit work
       // out of the box. More info on http://bugs.debian.org/882505
       Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");
@@ -165,7 +167,7 @@ CudaInstallationDetector::CudaInstallationDetector(
       if (FS.exists(FilePath)) {
         for (const char *GpuArchName :
              {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
-              "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) {
+              "sm_60", "sm_61", "sm_62", "sm_70", "sm_72", "sm_75"}) {
           const CudaArch GpuArch = StringToCudaArch(GpuArchName);
           if (Version >= MinVersionForCudaArch(GpuArch) &&
               Version <= MaxVersionForCudaArch(GpuArch))
@@ -276,32 +278,44 @@ void CudaInstallationDetector::print(raw_ostream &OS) const {
 }
 
 namespace {
-  /// Debug info kind.
-enum DebugInfoKind {
-  NoDebug,       /// No debug info.
-  LineTableOnly, /// Line tables only.
-  FullDebug      /// Full debug info.
+/// Debug info level for the NVPTX devices. We may need to emit different debug
+/// info level for the host and for the device itselfi. This type controls
+/// emission of the debug info for the devices. It either prohibits disable info
+/// emission completely, or emits debug directives only, or emits same debug
+/// info as for the host.
+enum DeviceDebugInfoLevel {
+  DisableDebugInfo,        /// Do not emit debug info for the devices.
+  DebugDirectivesOnly,     /// Emit only debug directives.
+  EmitSameDebugInfoAsHost, /// Use the same debug info level just like for the
+                           /// host.
 };
 } // anonymous namespace
 
-static DebugInfoKind mustEmitDebugInfo(const ArgList &Args) {
-  Arg *A = Args.getLastArg(options::OPT_O_Group);
-  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
-                   options::OPT_no_cuda_noopt_device_debug,
-                   !A || A->getOption().matches(options::OPT_O0))) {
-    if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
-      const Option &Opt = A->getOption();
-      if (Opt.matches(options::OPT_gN_Group)) {
-        if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
-          return NoDebug;
-        if (Opt.matches(options::OPT_gline_tables_only) ||
-            Opt.matches(options::OPT_ggdb1))
-          return LineTableOnly;
-      }
-      return FullDebug;
+/// Define debug info level for the NVPTX devices. If the debug info for both
+/// the host and device are disabled (-g0/-ggdb0 or no debug options at all). If
+/// only debug directives are requested for the both host and device
+/// (-gline-directvies-only), or the debug info only for the device is disabled
+/// (optimization is on and --cuda-noopt-device-debug was not specified), the
+/// debug directves only must be emitted for the device. Otherwise, use the same
+/// debug info level just like for the host (with the limitations of only
+/// supported DWARF2 standard).
+static DeviceDebugInfoLevel mustEmitDebugInfo(const ArgList &Args) {
+  const Arg *A = Args.getLastArg(options::OPT_O_Group);
+  bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
+                        Args.hasFlag(options::OPT_cuda_noopt_device_debug,
+                                     options::OPT_no_cuda_noopt_device_debug,
+                                     /*Default=*/false);
+  if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
+    const Option &Opt = A->getOption();
+    if (Opt.matches(options::OPT_gN_Group)) {
+      if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
+        return DisableDebugInfo;
+      if (Opt.matches(options::OPT_gline_directives_only))
+        return DebugDirectivesOnly;
     }
+    return IsDebugEnabled ? EmitSameDebugInfoAsHost : DebugDirectivesOnly;
   }
-  return NoDebug;
+  return DisableDebugInfo;
 }
 
 void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -335,8 +349,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
 
   ArgStringList CmdArgs;
   CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32");
-  DebugInfoKind DIKind = mustEmitDebugInfo(Args);
-  if (DIKind == FullDebug) {
+  DeviceDebugInfoLevel DIKind = mustEmitDebugInfo(Args);
+  if (DIKind == EmitSameDebugInfoAsHost) {
     // ptxas does not accept -g option if optimization is enabled, so
     // we ignore the compiler's -O* options if we want debug info.
     CmdArgs.push_back("-g");
@@ -372,7 +386,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     // to no optimizations, but ptxas's default is -O3.
     CmdArgs.push_back("-O0");
   }
-  if (DIKind == LineTableOnly)
+  if (DIKind == DebugDirectivesOnly)
     CmdArgs.push_back("-lineinfo");
 
   // Pass -v to ptxas if it was passed to the driver.
@@ -396,8 +410,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                options::OPT_fnoopenmp_relocatable_target,
                                /*Default=*/true);
   else if (JA.isOffloading(Action::OFK_Cuda))
-    Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
-                               options::OPT_fno_cuda_rdc, /*Default=*/false);
+    Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
+                               options::OPT_fno_gpu_rdc, /*Default=*/false);
 
   if (Relocatable)
     CmdArgs.push_back("-c");
@@ -443,7 +457,7 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");
   CmdArgs.push_back(Args.MakeArgString("--create"));
   CmdArgs.push_back(Args.MakeArgString(Output.getFilename()));
-  if (mustEmitDebugInfo(Args) == FullDebug)
+  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
     CmdArgs.push_back("-g");
 
   for (const auto& II : Inputs) {
@@ -496,7 +510,7 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Output.getFilename());
   } else
     assert(Output.isNothing() && "Invalid output.");
-  if (mustEmitDebugInfo(Args) == FullDebug)
+  if (mustEmitDebugInfo(Args) == EmitSameDebugInfoAsHost)
     CmdArgs.push_back("-g");
 
   if (Args.hasArg(options::OPT_v))
@@ -509,6 +523,11 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back("-arch");
   CmdArgs.push_back(Args.MakeArgString(GPUArch));
 
+  // Assume that the directory specified with --libomptarget_nvptx_path
+  // contains the static library libomptarget-nvptx.a.
+  if (const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
+    CmdArgs.push_back(Args.MakeArgString(Twine("-L") + A->getValue()));
+
   // Add paths specified in LIBRARY_PATH environment variable as -L options.
   addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
 
@@ -602,9 +621,9 @@ void CudaToolChain::addClangTargetOptions(
                            options::OPT_fno_cuda_approx_transcendentals, false))
       CC1Args.push_back("-fcuda-approx-transcendentals");
 
-    if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
+    if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
                            false))
-      CC1Args.push_back("-fcuda-rdc");
+      CC1Args.push_back("-fgpu-rdc");
   }
 
   if (DriverArgs.hasArg(options::OPT_nocudalib))
@@ -621,13 +640,16 @@ void CudaToolChain::addClangTargetOptions(
     return;
   }
 
-  CC1Args.push_back("-mlink-cuda-bitcode");
+  CC1Args.push_back("-mlink-builtin-bitcode");
   CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
 
   // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM
   // defaults to. Use PTX4.2 by default, which is the PTX version that came with
   // CUDA-7.0.
   const char *PtxFeature = "+ptx42";
+  // TODO(tra): CUDA-10+ needs PTX 6.3 to support new features. However that
+  // requires fair amount of work on LLVM side. We'll keep using PTX 6.1 until
+  // all prerequisites are in place.
   if (CudaInstallation.version() >= CudaVersion::CUDA_91) {
     // CUDA-9.1 uses new instructions that are only available in PTX6.1+
     PtxFeature = "+ptx61";
@@ -642,12 +664,9 @@ void CudaToolChain::addClangTargetOptions(
 
   if (DeviceOffloadingKind == Action::OFK_OpenMP) {
     SmallVector<StringRef, 8> LibraryPaths;
-    // Add path to lib and/or lib64 folders.
-    SmallString<256> DefaultLibPath =
-      llvm::sys::path::parent_path(getDriver().Dir);
-    llvm::sys::path::append(DefaultLibPath,
-        Twine("lib") + CLANG_LIBDIR_SUFFIX);
-    LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
+    if (const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
+      LibraryPaths.push_back(A->getValue());
 
     // Add user defined library paths from LIBRARY_PATH.
     llvm::Optional<std::string> LibPath =
@@ -660,6 +679,12 @@ void CudaToolChain::addClangTargetOptions(
         LibraryPaths.emplace_back(Path.trim());
     }
 
+    // Add path to lib / lib64 folder.
+    SmallString<256> DefaultLibPath =
+        llvm::sys::path::parent_path(getDriver().Dir);
+    llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
+    LibraryPaths.emplace_back(DefaultLibPath.c_str());
+
     std::string LibOmpTargetName =
       "libomptarget-nvptx-" + GpuArch.str() + ".bc";
     bool FoundBCLibrary = false;
@@ -667,7 +692,7 @@ void CudaToolChain::addClangTargetOptions(
       SmallString<128> LibOmpTargetFile(LibraryPath);
       llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
       if (llvm::sys::fs::exists(LibOmpTargetFile)) {
-        CC1Args.push_back("-mlink-cuda-bitcode");
+        CC1Args.push_back("-mlink-builtin-bitcode");
         CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
         FoundBCLibrary = true;
         break;
@@ -691,6 +716,21 @@ bool CudaToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
          O.matches(options::OPT_gcolumn_info);
 }
 
+void CudaToolChain::adjustDebugInfoKind(
+    codegenoptions::DebugInfoKind &DebugInfoKind, const ArgList &Args) const {
+  switch (mustEmitDebugInfo(Args)) {
+  case DisableDebugInfo:
+    DebugInfoKind = codegenoptions::NoDebugInfo;
+    break;
+  case DebugDirectivesOnly:
+    DebugInfoKind = codegenoptions::DebugDirectivesOnly;
+    break;
+  case EmitSameDebugInfoAsHost:
+    // Use same debug info level as the host.
+    break;
+  }
+}
+
 void CudaToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
                                        ArgStringList &CC1Args) const {
   // Check our CUDA version if we're going to include the CUDA headers.