diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Driver/ToolChains/Cuda.cpp')
| -rw-r--r-- | contrib/llvm/tools/clang/lib/Driver/ToolChains/Cuda.cpp | 228 | 
1 files changed, 188 insertions, 40 deletions
diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Cuda.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Cuda.cpp index bc4820797b2f..d17c4c39532a 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Cuda.cpp @@ -8,18 +8,21 @@  //===----------------------------------------------------------------------===//  #include "Cuda.h" -#include "InputInfo.h"  #include "CommonArgs.h" +#include "InputInfo.h"  #include "clang/Basic/Cuda.h" -#include "clang/Config/config.h"  #include "clang/Basic/VirtualFileSystem.h" -#include "clang/Driver/Distro.h" +#include "clang/Config/config.h"  #include "clang/Driver/Compilation.h" +#include "clang/Driver/Distro.h"  #include "clang/Driver/Driver.h"  #include "clang/Driver/DriverDiagnostic.h"  #include "clang/Driver/Options.h"  #include "llvm/Option/ArgList.h" +#include "llvm/Support/FileSystem.h"  #include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h"  #include <system_error>  using namespace clang::driver; @@ -52,6 +55,10 @@ static CudaVersion ParseCudaVersionFile(llvm::StringRef V) {      return CudaVersion::CUDA_80;    if (Major == 9 && Minor == 0)      return CudaVersion::CUDA_90; +  if (Major == 9 && Minor == 1) +    return CudaVersion::CUDA_91; +  if (Major == 9 && Minor == 2) +    return CudaVersion::CUDA_92;    return CudaVersion::UNKNOWN;  } @@ -59,42 +66,75 @@ CudaInstallationDetector::CudaInstallationDetector(      const Driver &D, const llvm::Triple &HostTriple,      const llvm::opt::ArgList &Args)      : D(D) { -  SmallVector<std::string, 4> CudaPathCandidates; +  struct Candidate { +    std::string Path; +    bool StrictChecking; + +    Candidate(std::string Path, bool StrictChecking = false) +        : Path(Path), StrictChecking(StrictChecking) {} +  }; +  SmallVector<Candidate, 4> Candidates;    // In decreasing order so we prefer newer versions to older versions.    std::initializer_list<const char *> Versions = {"8.0", "7.5", "7.0"};    if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) { -    CudaPathCandidates.push_back( -        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ)); +    Candidates.emplace_back( +        Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());    } else if (HostTriple.isOSWindows()) {      for (const char *Ver : Versions) -      CudaPathCandidates.push_back( +      Candidates.emplace_back(            D.SysRoot + "/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +            Ver);    } else { -    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda"); +    if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) { +      // Try to find ptxas binary. If the executable is located in a directory +      // called 'bin/', its parent directory might be a good guess for a valid +      // CUDA installation. +      // However, some distributions might installs 'ptxas' to /usr/bin. In that +      // case the candidate would be '/usr' which passes the following checks +      // because '/usr/include' exists as well. To avoid this case, we always +      // check for the directory potentially containing files for libdevice, +      // even if the user passes -nocudalib. +      if (llvm::ErrorOr<std::string> ptxas = +              llvm::sys::findProgramByName("ptxas")) { +        SmallString<256> ptxasAbsolutePath; +        llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath); + +        StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath); +        if (llvm::sys::path::filename(ptxasDir) == "bin") +          Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir), +                                  /*StrictChecking=*/true); +      } +    } + +    Candidates.emplace_back(D.SysRoot + "/usr/local/cuda");      for (const char *Ver : Versions) -      CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-" + Ver); +      Candidates.emplace_back(D.SysRoot + "/usr/local/cuda-" + Ver);      if (Distro(D.getVFS()).IsDebian())        // Special case for Debian to have nvidia-cuda-toolkit work        // out of the box. More info on http://bugs.debian.org/882505 -      CudaPathCandidates.push_back(D.SysRoot + "/usr/lib/cuda"); +      Candidates.emplace_back(D.SysRoot + "/usr/lib/cuda");    } -  for (const auto &CudaPath : CudaPathCandidates) { -    if (CudaPath.empty() || !D.getVFS().exists(CudaPath)) +  bool NoCudaLib = Args.hasArg(options::OPT_nocudalib); + +  for (const auto &Candidate : Candidates) { +    InstallPath = Candidate.Path; +    if (InstallPath.empty() || !D.getVFS().exists(InstallPath))        continue; -    InstallPath = CudaPath; -    BinPath = CudaPath + "/bin"; +    BinPath = InstallPath + "/bin";      IncludePath = InstallPath + "/include";      LibDevicePath = InstallPath + "/nvvm/libdevice";      auto &FS = D.getVFS();      if (!(FS.exists(IncludePath) && FS.exists(BinPath)))        continue; +    bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking); +    if (CheckLibDevice && !FS.exists(LibDevicePath)) +      continue;      // On Linux, we have both lib and lib64 directories, and we need to choose      // based on our triple.  On MacOS, we have only a lib directory. @@ -119,14 +159,18 @@ CudaInstallationDetector::CudaInstallationDetector(        Version = ParseCudaVersionFile((*VersionFile)->getBuffer());      } -    if (Version == CudaVersion::CUDA_90) { -      // CUDA-9 uses single libdevice file for all GPU variants. +    if (Version >= CudaVersion::CUDA_90) { +      // CUDA-9+ uses single libdevice file for all GPU variants.        std::string FilePath = LibDevicePath + "/libdevice.10.bc";        if (FS.exists(FilePath)) { -        for (const char *GpuArch : -             {"sm_20", "sm_30", "sm_32", "sm_35", "sm_50", "sm_52", "sm_53", -              "sm_60", "sm_61", "sm_62", "sm_70"}) -          LibDeviceMap[GpuArch] = FilePath; +        for (const char *GpuArchName : +             {"sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53", +              "sm_60", "sm_61", "sm_62", "sm_70", "sm_72"}) { +          const CudaArch GpuArch = StringToCudaArch(GpuArchName); +          if (Version >= MinVersionForCudaArch(GpuArch) && +              Version <= MaxVersionForCudaArch(GpuArch)) +            LibDeviceMap[GpuArchName] = FilePath; +        }        }      } else {        std::error_code EC; @@ -142,7 +186,7 @@ CudaInstallationDetector::CudaInstallationDetector(          StringRef GpuArch = FileName.slice(              LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));          LibDeviceMap[GpuArch] = FilePath.str(); -        // Insert map entries for specifc devices with this compute +        // Insert map entries for specific devices with this compute          // capability. NVCC's choice of the libdevice library version is          // rather peculiar and depends on the CUDA version.          if (GpuArch == "compute_20") { @@ -174,7 +218,7 @@ CudaInstallationDetector::CudaInstallationDetector(      // Check that we have found at least one libdevice that we can link in if      // -nocudalib hasn't been specified. -    if (LibDeviceMap.empty() && !Args.hasArg(options::OPT_nocudalib)) +    if (LibDeviceMap.empty() && !NoCudaLib)        continue;      IsValid = true; @@ -231,6 +275,35 @@ void CudaInstallationDetector::print(raw_ostream &OS) const {         << CudaVersionToString(Version) << "\n";  } +namespace { +  /// Debug info kind. +enum DebugInfoKind { +  NoDebug,       /// No debug info. +  LineTableOnly, /// Line tables only. +  FullDebug      /// Full debug info. +}; +} // anonymous namespace + +static DebugInfoKind mustEmitDebugInfo(const ArgList &Args) { +  Arg *A = Args.getLastArg(options::OPT_O_Group); +  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug, +                   options::OPT_no_cuda_noopt_device_debug, +                   !A || A->getOption().matches(options::OPT_O0))) { +    if (const Arg *A = Args.getLastArg(options::OPT_g_Group)) { +      const Option &Opt = A->getOption(); +      if (Opt.matches(options::OPT_gN_Group)) { +        if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0)) +          return NoDebug; +        if (Opt.matches(options::OPT_gline_tables_only) || +            Opt.matches(options::OPT_ggdb1)) +          return LineTableOnly; +      } +      return FullDebug; +    } +  } +  return NoDebug; +} +  void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,                                      const InputInfo &Output,                                      const InputInfoList &Inputs, @@ -262,8 +335,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,    ArgStringList CmdArgs;    CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-m64" : "-m32"); -  if (Args.hasFlag(options::OPT_cuda_noopt_device_debug, -                   options::OPT_no_cuda_noopt_device_debug, false)) { +  DebugInfoKind DIKind = mustEmitDebugInfo(Args); +  if (DIKind == FullDebug) {      // ptxas does not accept -g option if optimization is enabled, so      // we ignore the compiler's -O* options if we want debug info.      CmdArgs.push_back("-g"); @@ -299,6 +372,8 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,      // to no optimizations, but ptxas's default is -O3.      CmdArgs.push_back("-O0");    } +  if (DIKind == LineTableOnly) +    CmdArgs.push_back("-lineinfo");    // Pass -v to ptxas if it was passed to the driver.    if (Args.hasArg(options::OPT_v)) @@ -314,11 +389,17 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,    for (const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))      CmdArgs.push_back(Args.MakeArgString(A)); -  // In OpenMP we need to generate relocatable code. -  if (JA.isOffloading(Action::OFK_OpenMP) && -      Args.hasFlag(options::OPT_fopenmp_relocatable_target, -                   options::OPT_fnoopenmp_relocatable_target, -                   /*Default=*/ true)) +  bool Relocatable = false; +  if (JA.isOffloading(Action::OFK_OpenMP)) +    // In OpenMP we need to generate relocatable code. +    Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target, +                               options::OPT_fnoopenmp_relocatable_target, +                               /*Default=*/true); +  else if (JA.isOffloading(Action::OFK_Cuda)) +    Relocatable = Args.hasFlag(options::OPT_fcuda_rdc, +                               options::OPT_fno_cuda_rdc, /*Default=*/false); + +  if (Relocatable)      CmdArgs.push_back("-c");    const char *Exec; @@ -329,6 +410,22 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA,    C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));  } +static bool shouldIncludePTX(const ArgList &Args, const char *gpu_arch) { +  bool includePTX = true; +  for (Arg *A : Args) { +    if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) || +          A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ))) +      continue; +    A->claim(); +    const StringRef ArchStr = A->getValue(); +    if (ArchStr == "all" || ArchStr == gpu_arch) { +      includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ); +      continue; +    } +  } +  return includePTX; +} +  // All inputs to this linker must be from CudaDeviceActions, as we need to look  // at the Inputs' Actions in order to figure out which GPU architecture they  // correspond to. @@ -346,6 +443,8 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,    CmdArgs.push_back(TC.getTriple().isArch64Bit() ? "-64" : "-32");    CmdArgs.push_back(Args.MakeArgString("--create"));    CmdArgs.push_back(Args.MakeArgString(Output.getFilename())); +  if (mustEmitDebugInfo(Args) == FullDebug) +    CmdArgs.push_back("-g");    for (const auto& II : Inputs) {      auto *A = II.getAction(); @@ -356,6 +455,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,             "Device action expected to have associated a GPU architecture!");      CudaArch gpu_arch = StringToCudaArch(gpu_arch_str); +    if (II.getType() == types::TY_PP_Asm && +        !shouldIncludePTX(Args, gpu_arch_str)) +      continue;      // We need to pass an Arch of the form "sm_XX" for cubin files and      // "compute_XX" for ptx.      const char *Arch = @@ -394,7 +496,7 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,      CmdArgs.push_back(Output.getFilename());    } else      assert(Output.isNothing() && "Invalid output."); -  if (Args.hasArg(options::OPT_g_Flag)) +  if (mustEmitDebugInfo(Args) == FullDebug)      CmdArgs.push_back("-g");    if (Args.hasArg(options::OPT_v)) @@ -499,6 +601,10 @@ void CudaToolChain::addClangTargetOptions(      if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,                             options::OPT_fno_cuda_approx_transcendentals, false))        CC1Args.push_back("-fcuda-approx-transcendentals"); + +    if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc, +                           false)) +      CC1Args.push_back("-fcuda-rdc");    }    if (DriverArgs.hasArg(options::OPT_nocudalib)) @@ -518,16 +624,58 @@ void CudaToolChain::addClangTargetOptions(    CC1Args.push_back("-mlink-cuda-bitcode");    CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); -  if (CudaInstallation.version() >= CudaVersion::CUDA_90) { -    // CUDA-9 uses new instructions that are only available in PTX6.0 -    CC1Args.push_back("-target-feature"); -    CC1Args.push_back("+ptx60"); -  } else { -    // Libdevice in CUDA-7.0 requires PTX version that's more recent -    // than LLVM defaults to. Use PTX4.2 which is the PTX version that -    // came with CUDA-7.0. -    CC1Args.push_back("-target-feature"); -    CC1Args.push_back("+ptx42"); +  // Libdevice in CUDA-7.0 requires PTX version that's more recent than LLVM +  // defaults to. Use PTX4.2 by default, which is the PTX version that came with +  // CUDA-7.0. +  const char *PtxFeature = "+ptx42"; +  if (CudaInstallation.version() >= CudaVersion::CUDA_91) { +    // CUDA-9.1 uses new instructions that are only available in PTX6.1+ +    PtxFeature = "+ptx61"; +  } else if (CudaInstallation.version() >= CudaVersion::CUDA_90) { +    // CUDA-9.0 uses new instructions that are only available in PTX6.0+ +    PtxFeature = "+ptx60"; +  } +  CC1Args.append({"-target-feature", PtxFeature}); +  if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr, +                         options::OPT_fno_cuda_short_ptr, false)) +    CC1Args.append({"-mllvm", "--nvptx-short-ptr"}); + +  if (DeviceOffloadingKind == Action::OFK_OpenMP) { +    SmallVector<StringRef, 8> LibraryPaths; +    // Add path to lib and/or lib64 folders. +    SmallString<256> DefaultLibPath = +      llvm::sys::path::parent_path(getDriver().Dir); +    llvm::sys::path::append(DefaultLibPath, +        Twine("lib") + CLANG_LIBDIR_SUFFIX); +    LibraryPaths.emplace_back(DefaultLibPath.c_str()); + +    // Add user defined library paths from LIBRARY_PATH. +    llvm::Optional<std::string> LibPath = +        llvm::sys::Process::GetEnv("LIBRARY_PATH"); +    if (LibPath) { +      SmallVector<StringRef, 8> Frags; +      const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'}; +      llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr); +      for (StringRef Path : Frags) +        LibraryPaths.emplace_back(Path.trim()); +    } + +    std::string LibOmpTargetName = +      "libomptarget-nvptx-" + GpuArch.str() + ".bc"; +    bool FoundBCLibrary = false; +    for (StringRef LibraryPath : LibraryPaths) { +      SmallString<128> LibOmpTargetFile(LibraryPath); +      llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName); +      if (llvm::sys::fs::exists(LibOmpTargetFile)) { +        CC1Args.push_back("-mlink-cuda-bitcode"); +        CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile)); +        FoundBCLibrary = true; +        break; +      } +    } +    if (!FoundBCLibrary) +      getDriver().Diag(diag::warn_drv_omp_offload_target_missingbcruntime) +          << LibOmpTargetName;    }  }  | 
