diff options
Diffstat (limited to 'lib/Driver/Driver.cpp')
| -rw-r--r-- | lib/Driver/Driver.cpp | 1921 | 
1 files changed, 1496 insertions, 425 deletions
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp index 02f4a99977110..7bd43ac9da2f6 100644 --- a/lib/Driver/Driver.cpp +++ b/lib/Driver/Driver.cpp @@ -32,7 +32,6 @@  #include "llvm/Option/OptSpecifier.h"  #include "llvm/Option/OptTable.h"  #include "llvm/Option/Option.h" -#include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/FileSystem.h"  #include "llvm/Support/Path.h" @@ -43,6 +42,9 @@  #include <map>  #include <memory>  #include <utility> +#if LLVM_ON_UNIX +#include <unistd.h> // getpid +#endif  using namespace clang::driver;  using namespace clang; @@ -55,12 +57,12 @@ Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,        Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone),        LTOMode(LTOK_None), ClangExecutable(ClangExecutable),        SysRoot(DEFAULT_SYSROOT), UseStdLib(true), -      DefaultTargetTriple(DefaultTargetTriple),        DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),        CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr),        CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false), -      CCGenDiagnostics(false), CCCGenericGCCName(""), CheckInputsExist(true), -      CCCUsePCH(true), SuppressMissingInputWarning(false) { +      CCGenDiagnostics(false), DefaultTargetTriple(DefaultTargetTriple), +      CCCGenericGCCName(""), CheckInputsExist(true), CCCUsePCH(true), +      SuppressMissingInputWarning(false) {    // Provide a sane fallback if no VFS is specified.    if (!this->VFS) @@ -89,31 +91,39 @@ Driver::~Driver() {    llvm::DeleteContainerSeconds(ToolChains);  } -void Driver::ParseDriverMode(ArrayRef<const char *> Args) { -  const std::string OptName = -      getOpts().getOption(options::OPT_driver_mode).getPrefixedName(); +void Driver::ParseDriverMode(StringRef ProgramName, +                             ArrayRef<const char *> Args) { +  auto Default = ToolChain::getTargetAndModeFromProgramName(ProgramName); +  StringRef DefaultMode(Default.second); +  setDriverModeFromOption(DefaultMode);    for (const char *ArgPtr : Args) {      // Ingore nullptrs, they are response file's EOL markers      if (ArgPtr == nullptr)        continue;      const StringRef Arg = ArgPtr; -    if (!Arg.startswith(OptName)) -      continue; +    setDriverModeFromOption(Arg); +  } +} -    const StringRef Value = Arg.drop_front(OptName.size()); -    const unsigned M = llvm::StringSwitch<unsigned>(Value) -                           .Case("gcc", GCCMode) -                           .Case("g++", GXXMode) -                           .Case("cpp", CPPMode) -                           .Case("cl", CLMode) -                           .Default(~0U); +void Driver::setDriverModeFromOption(StringRef Opt) { +  const std::string OptName = +      getOpts().getOption(options::OPT_driver_mode).getPrefixedName(); +  if (!Opt.startswith(OptName)) +    return; +  StringRef Value = Opt.drop_front(OptName.size()); -    if (M != ~0U) -      Mode = static_cast<DriverMode>(M); -    else -      Diag(diag::err_drv_unsupported_option_argument) << OptName << Value; -  } +  const unsigned M = llvm::StringSwitch<unsigned>(Value) +                         .Case("gcc", GCCMode) +                         .Case("g++", GXXMode) +                         .Case("cpp", CPPMode) +                         .Case("cl", CLMode) +                         .Default(~0U); + +  if (M != ~0U) +    Mode = static_cast<DriverMode>(M); +  else +    Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;  }  InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) { @@ -170,6 +180,10 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,        (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) {      FinalPhase = phases::Preprocess; +    // --precompile only runs up to precompilation. +  } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) { +    FinalPhase = phases::Precompile; +      // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler.    } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) ||               (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) || @@ -423,6 +437,32 @@ void Driver::setLTOMode(const llvm::opt::ArgList &Args) {    }  } +/// Compute the desired OpenMP runtime from the flags provided. +Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { +  StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME); + +  const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ); +  if (A) +    RuntimeName = A->getValue(); + +  auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName) +                .Case("libomp", OMPRT_OMP) +                .Case("libgomp", OMPRT_GOMP) +                .Case("libiomp5", OMPRT_IOMP5) +                .Default(OMPRT_Unknown); + +  if (RT == OMPRT_Unknown) { +    if (A) +      Diag(diag::err_drv_unsupported_option_argument) +          << A->getOption().getName() << A->getValue(); +    else +      // FIXME: We could use a nicer diagnostic here. +      Diag(diag::err_drv_unsupported_opt) << "-fopenmp"; +  } + +  return RT; +} +  void Driver::CreateOffloadingDeviceToolChains(Compilation &C,                                                InputList &Inputs) { @@ -433,14 +473,71 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,    if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {          return types::isCuda(I.first);        })) { -    const ToolChain &TC = getToolChain( -        C.getInputArgs(), -        llvm::Triple(C.getSingleOffloadToolChain<Action::OFK_Host>() -                             ->getTriple() -                             .isArch64Bit() -                         ? "nvptx64-nvidia-cuda" -                         : "nvptx-nvidia-cuda")); -    C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); +    const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); +    const llvm::Triple &HostTriple = HostTC->getTriple(); +    llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" +                                                     : "nvptx-nvidia-cuda"); +    // Use the CUDA and host triples as the key into the ToolChains map, because +    // the device toolchain we create depends on both. +    ToolChain *&CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()]; +    if (!CudaTC) { +      CudaTC = new toolchains::CudaToolChain(*this, CudaTriple, *HostTC, +                                             C.getInputArgs()); +    } +    C.addOffloadDeviceToolChain(CudaTC, Action::OFK_Cuda); +  } + +  // +  // OpenMP +  // +  // We need to generate an OpenMP toolchain if the user specified targets with +  // the -fopenmp-targets option. +  if (Arg *OpenMPTargets = +          C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { +    if (OpenMPTargets->getNumValues()) { +      // We expect that -fopenmp-targets is always used in conjunction with the +      // option -fopenmp specifying a valid runtime with offloading support, +      // i.e. libomp or libiomp. +      bool HasValidOpenMPRuntime = C.getInputArgs().hasFlag( +          options::OPT_fopenmp, options::OPT_fopenmp_EQ, +          options::OPT_fno_openmp, false); +      if (HasValidOpenMPRuntime) { +        OpenMPRuntimeKind OpenMPKind = getOpenMPRuntime(C.getInputArgs()); +        HasValidOpenMPRuntime = +            OpenMPKind == OMPRT_OMP || OpenMPKind == OMPRT_IOMP5; +      } + +      if (HasValidOpenMPRuntime) { +        llvm::StringMap<const char *> FoundNormalizedTriples; +        for (const char *Val : OpenMPTargets->getValues()) { +          llvm::Triple TT(Val); +          std::string NormalizedName = TT.normalize(); + +          // Make sure we don't have a duplicate triple. +          auto Duplicate = FoundNormalizedTriples.find(NormalizedName); +          if (Duplicate != FoundNormalizedTriples.end()) { +            Diag(clang::diag::warn_drv_omp_offload_target_duplicate) +                << Val << Duplicate->second; +            continue; +          } + +          // Store the current triple so that we can check for duplicates in the +          // following iterations. +          FoundNormalizedTriples[NormalizedName] = Val; + +          // If the specified target is invalid, emit a diagnostic. +          if (TT.getArch() == llvm::Triple::UnknownArch) +            Diag(clang::diag::err_drv_invalid_omp_target) << Val; +          else { +            const ToolChain &TC = getToolChain(C.getInputArgs(), TT); +            C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); +          } +        } +      } else +        Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); +    } else +      Diag(clang::diag::warn_drv_empty_joined_argument) +          << OpenMPTargets->getAsString(C.getInputArgs());    }    // @@ -456,8 +553,9 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {    // FIXME: Handle environment options which affect driver behavior, somewhere    // (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS. -  if (char *env = ::getenv("COMPILER_PATH")) { -    StringRef CompilerPath = env; +  if (Optional<std::string> CompilerPathValue = +          llvm::sys::Process::GetEnv("COMPILER_PATH")) { +    StringRef CompilerPath = *CompilerPathValue;      while (!CompilerPath.empty()) {        std::pair<StringRef, StringRef> Split =            CompilerPath.split(llvm::sys::EnvPathSeparator); @@ -468,7 +566,7 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {    // We look for the driver mode option early, because the mode can affect    // how other options are parsed. -  ParseDriverMode(ArgList.slice(1)); +  ParseDriverMode(ClangExecutable, ArgList.slice(1));    // FIXME: What are we going to do with -V and -b? @@ -535,26 +633,20 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {    setLTOMode(Args); -  // Ignore -fembed-bitcode options with LTO -  // since the output will be bitcode anyway. -  if (getLTOMode() == LTOK_None) { -    if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { -      StringRef Name = A->getValue(); -      unsigned Model = llvm::StringSwitch<unsigned>(Name) -          .Case("off", EmbedNone) -          .Case("all", EmbedBitcode) -          .Case("bitcode", EmbedBitcode) -          .Case("marker", EmbedMarker) -          .Default(~0U); -      if (Model == ~0U) { -        Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) -                                                  << Name; -      } else -        BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model); -    } -  } else { -    // claim the bitcode option under LTO so no warning is issued. -    Args.ClaimAllArgs(options::OPT_fembed_bitcode_EQ); +  // Process -fembed-bitcode= flags. +  if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { +    StringRef Name = A->getValue(); +    unsigned Model = llvm::StringSwitch<unsigned>(Name) +        .Case("off", EmbedNone) +        .Case("all", EmbedBitcode) +        .Case("bitcode", EmbedBitcode) +        .Case("marker", EmbedMarker) +        .Default(~0U); +    if (Model == ~0U) { +      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) +                                                << Name; +    } else +      BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);    }    std::unique_ptr<llvm::opt::InputArgList> UArgs = @@ -610,6 +702,95 @@ static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) {    OS << '\n';  } +bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename, +                                    SmallString<128> &CrashDiagDir) { +  using namespace llvm::sys; +  assert(llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin() && +         "Only knows about .crash files on Darwin"); + +  // The .crash file can be found on at ~/Library/Logs/DiagnosticReports/ +  // (or /Library/Logs/DiagnosticReports for root) and has the filename pattern +  // clang-<VERSION>_<YYYY-MM-DD-HHMMSS>_<hostname>.crash. +  path::home_directory(CrashDiagDir); +  if (CrashDiagDir.startswith("/var/root")) +    CrashDiagDir = "/"; +  path::append(CrashDiagDir, "Library/Logs/DiagnosticReports"); +  int PID = +#if LLVM_ON_UNIX +      getpid(); +#else +      0; +#endif +  std::error_code EC; +  fs::file_status FileStatus; +  TimePoint<> LastAccessTime; +  SmallString<128> CrashFilePath; +  // Lookup the .crash files and get the one generated by a subprocess spawned +  // by this driver invocation. +  for (fs::directory_iterator File(CrashDiagDir, EC), FileEnd; +       File != FileEnd && !EC; File.increment(EC)) { +    StringRef FileName = path::filename(File->path()); +    if (!FileName.startswith(Name)) +      continue; +    if (fs::status(File->path(), FileStatus)) +      continue; +    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CrashFile = +        llvm::MemoryBuffer::getFile(File->path()); +    if (!CrashFile) +      continue; +    // The first line should start with "Process:", otherwise this isn't a real +    // .crash file. +    StringRef Data = CrashFile.get()->getBuffer(); +    if (!Data.startswith("Process:")) +      continue; +    // Parse parent process pid line, e.g: "Parent Process: clang-4.0 [79141]" +    size_t ParentProcPos = Data.find("Parent Process:"); +    if (ParentProcPos == StringRef::npos) +      continue; +    size_t LineEnd = Data.find_first_of("\n", ParentProcPos); +    if (LineEnd == StringRef::npos) +      continue; +    StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim(); +    int OpenBracket = -1, CloseBracket = -1; +    for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) { +      if (ParentProcess[i] == '[') +        OpenBracket = i; +      if (ParentProcess[i] == ']') +        CloseBracket = i; +    } +    // Extract the parent process PID from the .crash file and check whether +    // it matches this driver invocation pid. +    int CrashPID; +    if (OpenBracket < 0 || CloseBracket < 0 || +        ParentProcess.slice(OpenBracket + 1, CloseBracket) +            .getAsInteger(10, CrashPID) || CrashPID != PID) { +      continue; +    } + +    // Found a .crash file matching the driver pid. To avoid getting an older +    // and misleading crash file, continue looking for the most recent. +    // FIXME: the driver can dispatch multiple cc1 invocations, leading to +    // multiple crashes poiting to the same parent process. Since the driver +    // does not collect pid information for the dispatched invocation there's +    // currently no way to distinguish among them. +    const auto FileAccessTime = FileStatus.getLastModificationTime(); +    if (FileAccessTime > LastAccessTime) { +      CrashFilePath.assign(File->path()); +      LastAccessTime = FileAccessTime; +    } +  } + +  // If found, copy it over to the location of other reproducer files. +  if (!CrashFilePath.empty()) { +    EC = fs::copy_file(CrashFilePath, ReproCrashFilename); +    if (EC) +      return false; +    return true; +  } + +  return false; +} +  // When clang crashes, produce diagnostic information including the fully  // preprocessed source file(s).  Request that the developer attach the  // diagnostic information to a bug report. @@ -737,8 +918,13 @@ void Driver::generateCompilationDiagnostics(Compilation &C,           "Preprocessed source(s) and associated run script(s) are located at:";    SmallString<128> VFS; +  SmallString<128> ReproCrashFilename;    for (const char *TempFile : TempFiles) {      Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile; +    if (ReproCrashFilename.empty()) { +      ReproCrashFilename = TempFile; +      llvm::sys::path::replace_extension(ReproCrashFilename, ".crash"); +    }      if (StringRef(TempFile).endswith(".cache")) {        // In some cases (modules) we'll dump extra data to help with reproducing        // the crash into a directory next to the output. @@ -766,6 +952,24 @@ void Driver::generateCompilationDiagnostics(Compilation &C,      Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;    } +  // On darwin, provide information about the .crash diagnostic report. +  if (llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin()) { +    SmallString<128> CrashDiagDir; +    if (getCrashDiagnosticFile(ReproCrashFilename, CrashDiagDir)) { +      Diag(clang::diag::note_drv_command_failed_diag_msg) +          << ReproCrashFilename.str(); +    } else { // Suggest a directory for the user to look for .crash files. +      llvm::sys::path::append(CrashDiagDir, Name); +      CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash"; +      Diag(clang::diag::note_drv_command_failed_diag_msg) +          << "Crash backtrace is located in"; +      Diag(clang::diag::note_drv_command_failed_diag_msg) +          << CrashDiagDir.str(); +      Diag(clang::diag::note_drv_command_failed_diag_msg) +          << "(choose the .crash file that corresponds to your crash)"; +    } +  } +    for (const auto &A : C.getArgs().filtered(options::OPT_frewrite_map_file,                                              options::OPT_frewrite_map_file_EQ))      Diag(clang::diag::note_drv_command_failed_diag_msg) << A->getValue(); @@ -783,8 +987,7 @@ void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) {      return;    std::string TmpName = GetTemporaryPath("response", "txt"); -  Cmd.setResponseFile( -      C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()))); +  Cmd.setResponseFile(C.addTempFile(C.getArgs().MakeArgString(TmpName)));  }  int Driver::ExecuteCompilation( @@ -982,7 +1185,15 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {    }    if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) { -    llvm::outs() << GetFilePath("libgcc.a", TC) << "\n"; +    ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs()); +    switch (RLT) { +    case ToolChain::RLT_CompilerRT: +      llvm::outs() << TC.getCompilerRT(C.getArgs(), "builtins") << "\n"; +      break; +    case ToolChain::RLT_Libgcc: +      llvm::outs() << GetFilePath("libgcc.a", TC) << "\n"; +      break; +    }      return false;    } @@ -1388,132 +1599,745 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,    }  } -// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE -// input action and then wraps each in CudaDeviceAction paired with -// appropriate GPU arch name. In case of partial (i.e preprocessing -// only) or device-only compilation, each device action is added to /p -// Actions and /p Current is released. Otherwise the function creates -// and returns a new CudaHostAction which wraps /p Current and device -// side actions. -static Action *buildCudaActions(Compilation &C, DerivedArgList &Args, -                                const Arg *InputArg, Action *HostAction, -                                ActionList &Actions) { -  Arg *PartialCompilationArg = Args.getLastArg( -      options::OPT_cuda_host_only, options::OPT_cuda_device_only, -      options::OPT_cuda_compile_host_device); -  bool CompileHostOnly = -      PartialCompilationArg && -      PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only); -  bool CompileDeviceOnly = -      PartialCompilationArg && -      PartialCompilationArg->getOption().matches(options::OPT_cuda_device_only); - -  if (CompileHostOnly) { +namespace { +/// Provides a convenient interface for different programming models to generate +/// the required device actions. +class OffloadingActionBuilder final { +  /// Flag used to trace errors in the builder. +  bool IsValid = false; + +  /// The compilation that is using this builder. +  Compilation &C; + +  /// Map between an input argument and the offload kinds used to process it. +  std::map<const Arg *, unsigned> InputArgToOffloadKindMap; + +  /// Builder interface. It doesn't build anything or keep any state. +  class DeviceActionBuilder { +  public: +    typedef llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PhasesTy; + +    enum ActionBuilderReturnCode { +      // The builder acted successfully on the current action. +      ABRT_Success, +      // The builder didn't have to act on the current action. +      ABRT_Inactive, +      // The builder was successful and requested the host action to not be +      // generated. +      ABRT_Ignore_Host, +    }; + +  protected: +    /// Compilation associated with this builder. +    Compilation &C; + +    /// Tool chains associated with this builder. The same programming +    /// model may have associated one or more tool chains. +    SmallVector<const ToolChain *, 2> ToolChains; + +    /// The derived arguments associated with this builder. +    DerivedArgList &Args; + +    /// The inputs associated with this builder. +    const Driver::InputList &Inputs; + +    /// The associated offload kind. +    Action::OffloadKind AssociatedOffloadKind = Action::OFK_None; + +  public: +    DeviceActionBuilder(Compilation &C, DerivedArgList &Args, +                        const Driver::InputList &Inputs, +                        Action::OffloadKind AssociatedOffloadKind) +        : C(C), Args(Args), Inputs(Inputs), +          AssociatedOffloadKind(AssociatedOffloadKind) {} +    virtual ~DeviceActionBuilder() {} + +    /// Fill up the array \a DA with all the device dependences that should be +    /// added to the provided host action \a HostAction. By default it is +    /// inactive. +    virtual ActionBuilderReturnCode +    getDeviceDependences(OffloadAction::DeviceDependences &DA, +                         phases::ID CurPhase, phases::ID FinalPhase, +                         PhasesTy &Phases) { +      return ABRT_Inactive; +    } + +    /// Update the state to include the provided host action \a HostAction as a +    /// dependency of the current device action. By default it is inactive. +    virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) { +      return ABRT_Inactive; +    } + +    /// Append top level actions generated by the builder. Return true if errors +    /// were found. +    virtual void appendTopLevelActions(ActionList &AL) {} + +    /// Append linker actions generated by the builder. Return true if errors +    /// were found. +    virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} + +    /// Initialize the builder. Return true if any initialization errors are +    /// found. +    virtual bool initialize() { return false; } + +    /// Return true if the builder can use bundling/unbundling. +    virtual bool canUseBundlerUnbundler() const { return false; } + +    /// Return true if this builder is valid. We have a valid builder if we have +    /// associated device tool chains. +    bool isValid() { return !ToolChains.empty(); } + +    /// Return the associated offload kind. +    Action::OffloadKind getAssociatedOffloadKind() { +      return AssociatedOffloadKind; +    } +  }; + +  /// \brief CUDA action builder. It injects device code in the host backend +  /// action. +  class CudaActionBuilder final : public DeviceActionBuilder { +    /// Flags to signal if the user requested host-only or device-only +    /// compilation. +    bool CompileHostOnly = false; +    bool CompileDeviceOnly = false; + +    /// List of GPU architectures to use in this compilation. +    SmallVector<CudaArch, 4> GpuArchList; + +    /// The CUDA actions for the current input. +    ActionList CudaDeviceActions; + +    /// The CUDA fat binary if it was generated for the current input. +    Action *CudaFatBinary = nullptr; + +    /// Flag that is set to true if this builder acted on the current input. +    bool IsActive = false; + +  public: +    CudaActionBuilder(Compilation &C, DerivedArgList &Args, +                      const Driver::InputList &Inputs) +        : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {} + +    ActionBuilderReturnCode +    getDeviceDependences(OffloadAction::DeviceDependences &DA, +                         phases::ID CurPhase, phases::ID FinalPhase, +                         PhasesTy &Phases) override { +      if (!IsActive) +        return ABRT_Inactive; + +      // If we don't have more CUDA actions, we don't have any dependences to +      // create for the host. +      if (CudaDeviceActions.empty()) +        return ABRT_Success; + +      assert(CudaDeviceActions.size() == GpuArchList.size() && +             "Expecting one action per GPU architecture."); +      assert(!CompileHostOnly && +             "Not expecting CUDA actions in host-only compilation."); + +      // If we are generating code for the device or we are in a backend phase, +      // we attempt to generate the fat binary. We compile each arch to ptx and +      // assemble to cubin, then feed the cubin *and* the ptx into a device +      // "link" action, which uses fatbinary to combine these cubins into one +      // fatbin.  The fatbin is then an input to the host action if not in +      // device-only mode. +      if (CompileDeviceOnly || CurPhase == phases::Backend) { +        ActionList DeviceActions; +        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { +          // Produce the device action from the current phase up to the assemble +          // phase. +          for (auto Ph : Phases) { +            // Skip the phases that were already dealt with. +            if (Ph < CurPhase) +              continue; +            // We have to be consistent with the host final phase. +            if (Ph > FinalPhase) +              break; + +            CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( +                C, Args, Ph, CudaDeviceActions[I]); + +            if (Ph == phases::Assemble) +              break; +          } + +          // If we didn't reach the assemble phase, we can't generate the fat +          // binary. We don't need to generate the fat binary if we are not in +          // device-only mode. +          if (!isa<AssembleJobAction>(CudaDeviceActions[I]) || +              CompileDeviceOnly) +            continue; + +          Action *AssembleAction = CudaDeviceActions[I]; +          assert(AssembleAction->getType() == types::TY_Object); +          assert(AssembleAction->getInputs().size() == 1); + +          Action *BackendAction = AssembleAction->getInputs()[0]; +          assert(BackendAction->getType() == types::TY_PP_Asm); + +          for (auto &A : {AssembleAction, BackendAction}) { +            OffloadAction::DeviceDependences DDep; +            DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), +                     Action::OFK_Cuda); +            DeviceActions.push_back( +                C.MakeAction<OffloadAction>(DDep, A->getType())); +          } +        } + +        // We generate the fat binary if we have device input actions. +        if (!DeviceActions.empty()) { +          CudaFatBinary = +              C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN); + +          if (!CompileDeviceOnly) { +            DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, +                   Action::OFK_Cuda); +            // Clear the fat binary, it is already a dependence to an host +            // action. +            CudaFatBinary = nullptr; +          } + +          // Remove the CUDA actions as they are already connected to an host +          // action or fat binary. +          CudaDeviceActions.clear(); +        } + +        // We avoid creating host action in device-only mode. +        return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; +      } else if (CurPhase > phases::Backend) { +        // If we are past the backend phase and still have a device action, we +        // don't have to do anything as this action is already a device +        // top-level action. +        return ABRT_Success; +      } + +      assert(CurPhase < phases::Backend && "Generating single CUDA " +                                           "instructions should only occur " +                                           "before the backend phase!"); + +      // By default, we produce an action for each device arch. +      for (Action *&A : CudaDeviceActions) +        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + +      return ABRT_Success; +    } + +    ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { +      // While generating code for CUDA, we only depend on the host input action +      // to trigger the creation of all the CUDA device actions. + +      // If we are dealing with an input action, replicate it for each GPU +      // architecture. If we are in host-only mode we return 'success' so that +      // the host uses the CUDA offload kind. +      if (auto *IA = dyn_cast<InputAction>(HostAction)) { +        assert(!GpuArchList.empty() && +               "We should have at least one GPU architecture."); + +        // If the host input is not CUDA, we don't need to bother about this +        // input. +        if (IA->getType() != types::TY_CUDA) { +          // The builder will ignore this input. +          IsActive = false; +          return ABRT_Inactive; +        } + +        // Set the flag to true, so that the builder acts on the current input. +        IsActive = true; + +        if (CompileHostOnly) +          return ABRT_Success; + +        // Replicate inputs for each GPU architecture. +        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) +          CudaDeviceActions.push_back(C.MakeAction<InputAction>( +              IA->getInputArg(), types::TY_CUDA_DEVICE)); + +        return ABRT_Success; +      } + +      return IsActive ? ABRT_Success : ABRT_Inactive; +    } + +    void appendTopLevelActions(ActionList &AL) override { +      // Utility to append actions to the top level list. +      auto AddTopLevel = [&](Action *A, CudaArch BoundArch) { +        OffloadAction::DeviceDependences Dep; +        Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch), +                Action::OFK_Cuda); +        AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType())); +      }; + +      // If we have a fat binary, add it to the list. +      if (CudaFatBinary) { +        AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN); +        CudaDeviceActions.clear(); +        CudaFatBinary = nullptr; +        return; +      } + +      if (CudaDeviceActions.empty()) +        return; + +      // If we have CUDA actions at this point, that's because we have a have +      // partial compilation, so we should have an action for each GPU +      // architecture. +      assert(CudaDeviceActions.size() == GpuArchList.size() && +             "Expecting one action per GPU architecture."); +      assert(ToolChains.size() == 1 && +             "Expecting to have a sing CUDA toolchain."); +      for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) +        AddTopLevel(CudaDeviceActions[I], GpuArchList[I]); + +      CudaDeviceActions.clear(); +    } + +    bool initialize() override { +      // We don't need to support CUDA. +      if (!C.hasOffloadToolChain<Action::OFK_Cuda>()) +        return false; + +      const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); +      assert(HostTC && "No toolchain for host compilation."); +      if (HostTC->getTriple().isNVPTX()) { +        // We do not support targeting NVPTX for host compilation. Throw +        // an error and abort pipeline construction early so we don't trip +        // asserts that assume device-side compilation. +        C.getDriver().Diag(diag::err_drv_cuda_nvptx_host); +        return true; +      } + +      ToolChains.push_back(C.getSingleOffloadToolChain<Action::OFK_Cuda>()); + +      Arg *PartialCompilationArg = Args.getLastArg( +          options::OPT_cuda_host_only, options::OPT_cuda_device_only, +          options::OPT_cuda_compile_host_device); +      CompileHostOnly = PartialCompilationArg && +                        PartialCompilationArg->getOption().matches( +                            options::OPT_cuda_host_only); +      CompileDeviceOnly = PartialCompilationArg && +                          PartialCompilationArg->getOption().matches( +                              options::OPT_cuda_device_only); + +      // Collect all cuda_gpu_arch parameters, removing duplicates. +      std::set<CudaArch> GpuArchs; +      bool Error = false; +      for (Arg *A : Args) { +        if (!(A->getOption().matches(options::OPT_cuda_gpu_arch_EQ) || +              A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ))) +          continue; +        A->claim(); + +        const StringRef ArchStr = A->getValue(); +        if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ) && +            ArchStr == "all") { +          GpuArchs.clear(); +          continue; +        } +        CudaArch Arch = StringToCudaArch(ArchStr); +        if (Arch == CudaArch::UNKNOWN) { +          C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; +          Error = true; +        } else if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) +          GpuArchs.insert(Arch); +        else if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ)) +          GpuArchs.erase(Arch); +        else +          llvm_unreachable("Unexpected option."); +      } + +      // Collect list of GPUs remaining in the set. +      for (CudaArch Arch : GpuArchs) +        GpuArchList.push_back(Arch); + +      // Default to sm_20 which is the lowest common denominator for +      // supported GPUs.  sm_20 code should work correctly, if +      // suboptimally, on all newer GPUs. +      if (GpuArchList.empty()) +        GpuArchList.push_back(CudaArch::SM_20); + +      return Error; +    } +  }; + +  /// OpenMP action builder. The host bitcode is passed to the device frontend +  /// and all the device linked images are passed to the host link phase. +  class OpenMPActionBuilder final : public DeviceActionBuilder { +    /// The OpenMP actions for the current input. +    ActionList OpenMPDeviceActions; + +    /// The linker inputs obtained for each toolchain. +    SmallVector<ActionList, 8> DeviceLinkerInputs; + +  public: +    OpenMPActionBuilder(Compilation &C, DerivedArgList &Args, +                        const Driver::InputList &Inputs) +        : DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {} + +    ActionBuilderReturnCode +    getDeviceDependences(OffloadAction::DeviceDependences &DA, +                         phases::ID CurPhase, phases::ID FinalPhase, +                         PhasesTy &Phases) override { + +      // We should always have an action for each input. +      assert(OpenMPDeviceActions.size() == ToolChains.size() && +             "Number of OpenMP actions and toolchains do not match."); + +      // The host only depends on device action in the linking phase, when all +      // the device images have to be embedded in the host image. +      if (CurPhase == phases::Link) { +        assert(ToolChains.size() == DeviceLinkerInputs.size() && +               "Toolchains and linker inputs sizes do not match."); +        auto LI = DeviceLinkerInputs.begin(); +        for (auto *A : OpenMPDeviceActions) { +          LI->push_back(A); +          ++LI; +        } + +        // We passed the device action as a host dependence, so we don't need to +        // do anything else with them. +        OpenMPDeviceActions.clear(); +        return ABRT_Success; +      } + +      // By default, we produce an action for each device arch. +      for (Action *&A : OpenMPDeviceActions) +        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + +      return ABRT_Success; +    } + +    ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + +      // If this is an input action replicate it for each OpenMP toolchain. +      if (auto *IA = dyn_cast<InputAction>(HostAction)) { +        OpenMPDeviceActions.clear(); +        for (unsigned I = 0; I < ToolChains.size(); ++I) +          OpenMPDeviceActions.push_back( +              C.MakeAction<InputAction>(IA->getInputArg(), IA->getType())); +        return ABRT_Success; +      } + +      // If this is an unbundling action use it as is for each OpenMP toolchain. +      if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) { +        OpenMPDeviceActions.clear(); +        for (unsigned I = 0; I < ToolChains.size(); ++I) { +          OpenMPDeviceActions.push_back(UA); +          UA->registerDependentActionInfo( +              ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_OpenMP); +        } +        return ABRT_Success; +      } + +      // When generating code for OpenMP we use the host compile phase result as +      // a dependence to the device compile phase so that it can learn what +      // declarations should be emitted. However, this is not the only use for +      // the host action, so we prevent it from being collapsed. +      if (isa<CompileJobAction>(HostAction)) { +        HostAction->setCannotBeCollapsedWithNextDependentAction(); +        assert(ToolChains.size() == OpenMPDeviceActions.size() && +               "Toolchains and device action sizes do not match."); +        OffloadAction::HostDependence HDep( +            *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), +            /*BoundArch=*/nullptr, Action::OFK_OpenMP); +        auto TC = ToolChains.begin(); +        for (Action *&A : OpenMPDeviceActions) { +          assert(isa<CompileJobAction>(A)); +          OffloadAction::DeviceDependences DDep; +          DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); +          A = C.MakeAction<OffloadAction>(HDep, DDep); +          ++TC; +        } +      } +      return ABRT_Success; +    } + +    void appendTopLevelActions(ActionList &AL) override { +      if (OpenMPDeviceActions.empty()) +        return; + +      // We should always have an action for each input. +      assert(OpenMPDeviceActions.size() == ToolChains.size() && +             "Number of OpenMP actions and toolchains do not match."); + +      // Append all device actions followed by the proper offload action. +      auto TI = ToolChains.begin(); +      for (auto *A : OpenMPDeviceActions) { +        OffloadAction::DeviceDependences Dep; +        Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP); +        AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType())); +        ++TI; +      } +      // We no longer need the action stored in this builder. +      OpenMPDeviceActions.clear(); +    } + +    void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { +      assert(ToolChains.size() == DeviceLinkerInputs.size() && +             "Toolchains and linker inputs sizes do not match."); + +      // Append a new link action for each device. +      auto TC = ToolChains.begin(); +      for (auto &LI : DeviceLinkerInputs) { +        auto *DeviceLinkAction = +            C.MakeAction<LinkJobAction>(LI, types::TY_Image); +        DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, +               Action::OFK_OpenMP); +        ++TC; +      } +    } + +    bool initialize() override { +      // Get the OpenMP toolchains. If we don't get any, the action builder will +      // know there is nothing to do related to OpenMP offloading. +      auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); +      for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; +           ++TI) +        ToolChains.push_back(TI->second); + +      DeviceLinkerInputs.resize(ToolChains.size()); +      return false; +    } + +    bool canUseBundlerUnbundler() const override { +      // OpenMP should use bundled files whenever possible. +      return true; +    } +  }; + +  /// +  /// TODO: Add the implementation for other specialized builders here. +  /// + +  /// Specialized builders being used by this offloading action builder. +  SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders; + +  /// Flag set to true if all valid builders allow file bundling/unbundling. +  bool CanUseBundler; + +public: +  OffloadingActionBuilder(Compilation &C, DerivedArgList &Args, +                          const Driver::InputList &Inputs) +      : C(C) { +    // Create a specialized builder for each device toolchain. + +    IsValid = true; + +    // Create a specialized builder for CUDA. +    SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs)); + +    // Create a specialized builder for OpenMP. +    SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs)); + +    // +    // TODO: Build other specialized builders here. +    // + +    // Initialize all the builders, keeping track of errors. If all valid +    // builders agree that we can use bundling, set the flag to true. +    unsigned ValidBuilders = 0u; +    unsigned ValidBuildersSupportingBundling = 0u; +    for (auto *SB : SpecializedBuilders) { +      IsValid = IsValid && !SB->initialize(); + +      // Update the counters if the builder is valid. +      if (SB->isValid()) { +        ++ValidBuilders; +        if (SB->canUseBundlerUnbundler()) +          ++ValidBuildersSupportingBundling; +      } +    } +    CanUseBundler = +        ValidBuilders && ValidBuilders == ValidBuildersSupportingBundling; +  } + +  ~OffloadingActionBuilder() { +    for (auto *SB : SpecializedBuilders) +      delete SB; +  } + +  /// Generate an action that adds device dependences (if any) to a host action. +  /// If no device dependence actions exist, just return the host action \a +  /// HostAction. If an error is found or if no builder requires the host action +  /// to be generated, return nullptr. +  Action * +  addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg, +                                   phases::ID CurPhase, phases::ID FinalPhase, +                                   DeviceActionBuilder::PhasesTy &Phases) { +    if (!IsValid) +      return nullptr; + +    if (SpecializedBuilders.empty()) +      return HostAction; + +    assert(HostAction && "Invalid host action!"); + +    OffloadAction::DeviceDependences DDeps; +    // Check if all the programming models agree we should not emit the host +    // action. Also, keep track of the offloading kinds employed. +    auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; +    unsigned InactiveBuilders = 0u; +    unsigned IgnoringBuilders = 0u; +    for (auto *SB : SpecializedBuilders) { +      if (!SB->isValid()) { +        ++InactiveBuilders; +        continue; +      } + +      auto RetCode = +          SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases); + +      // If the builder explicitly says the host action should be ignored, +      // we need to increment the variable that tracks the builders that request +      // the host object to be ignored. +      if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host) +        ++IgnoringBuilders; + +      // Unless the builder was inactive for this action, we have to record the +      // offload kind because the host will have to use it. +      if (RetCode != DeviceActionBuilder::ABRT_Inactive) +        OffloadKind |= SB->getAssociatedOffloadKind(); +    } + +    // If all builders agree that the host object should be ignored, just return +    // nullptr. +    if (IgnoringBuilders && +        SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders)) +      return nullptr; + +    if (DDeps.getActions().empty()) +      return HostAction; + +    // We have dependences we need to bundle together. We use an offload action +    // for that.      OffloadAction::HostDependence HDep(          *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), -        /*BoundArch=*/nullptr, Action::OFK_Cuda); -    return C.MakeAction<OffloadAction>(HDep); +        /*BoundArch=*/nullptr, DDeps); +    return C.MakeAction<OffloadAction>(HDep, DDeps);    } -  // Collect all cuda_gpu_arch parameters, removing duplicates. -  SmallVector<CudaArch, 4> GpuArchList; -  llvm::SmallSet<CudaArch, 4> GpuArchs; -  for (Arg *A : Args) { -    if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) -      continue; -    A->claim(); +  /// Generate an action that adds a host dependence to a device action. The +  /// results will be kept in this action builder. Return true if an error was +  /// found. +  bool addHostDependenceToDeviceActions(Action *&HostAction, +                                        const Arg *InputArg) { +    if (!IsValid) +      return true; -    const auto &ArchStr = A->getValue(); -    CudaArch Arch = StringToCudaArch(ArchStr); -    if (Arch == CudaArch::UNKNOWN) -      C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; -    else if (GpuArchs.insert(Arch).second) -      GpuArchList.push_back(Arch); -  } - -  // Default to sm_20 which is the lowest common denominator for supported GPUs. -  // sm_20 code should work correctly, if suboptimally, on all newer GPUs. -  if (GpuArchList.empty()) -    GpuArchList.push_back(CudaArch::SM_20); - -  // Replicate inputs for each GPU architecture. -  Driver::InputList CudaDeviceInputs; -  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) -    CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg)); - -  // Build actions for all device inputs. -  ActionList CudaDeviceActions; -  C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions); -  assert(GpuArchList.size() == CudaDeviceActions.size() && -         "Failed to create actions for all devices"); - -  // Check whether any of device actions stopped before they could generate PTX. -  bool PartialCompilation = -      llvm::any_of(CudaDeviceActions, [](const Action *a) { -        return a->getKind() != Action::AssembleJobClass; -      }); +    // If we are supporting bundling/unbundling and the current action is an +    // input action of non-source file, we replace the host action by the +    // unbundling action. The bundler tool has the logic to detect if an input +    // is a bundle or not and if the input is not a bundle it assumes it is a +    // host file. Therefore it is safe to create an unbundling action even if +    // the input is not a bundle. +    if (CanUseBundler && isa<InputAction>(HostAction) && +        InputArg->getOption().getKind() == llvm::opt::Option::InputClass && +        !types::isSrcFile(HostAction->getType())) { +      auto UnbundlingHostAction = +          C.MakeAction<OffloadUnbundlingJobAction>(HostAction); +      UnbundlingHostAction->registerDependentActionInfo( +          C.getSingleOffloadToolChain<Action::OFK_Host>(), +          /*BoundArch=*/StringRef(), Action::OFK_Host); +      HostAction = UnbundlingHostAction; +    } -  const ToolChain *CudaTC = C.getSingleOffloadToolChain<Action::OFK_Cuda>(); +    assert(HostAction && "Invalid host action!"); -  // Figure out what to do with device actions -- pass them as inputs to the -  // host action or run each of them independently. -  if (PartialCompilation || CompileDeviceOnly) { -    // In case of partial or device-only compilation results of device actions -    // are not consumed by the host action device actions have to be added to -    // top-level actions list with AtTopLevel=true and run independently. +    // Register the offload kinds that are used. +    auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; +    for (auto *SB : SpecializedBuilders) { +      if (!SB->isValid()) +        continue; -    // -o is ambiguous if we have more than one top-level action. -    if (Args.hasArg(options::OPT_o) && -        (!CompileDeviceOnly || GpuArchList.size() > 1)) { -      C.getDriver().Diag( -          clang::diag::err_drv_output_argument_with_multiple_files); -      return nullptr; +      auto RetCode = SB->addDeviceDepences(HostAction); + +      // Host dependences for device actions are not compatible with that same +      // action being ignored. +      assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host && +             "Host dependence not expected to be ignored.!"); + +      // Unless the builder was inactive for this action, we have to record the +      // offload kind because the host will have to use it. +      if (RetCode != DeviceActionBuilder::ABRT_Inactive) +        OffloadKind |= SB->getAssociatedOffloadKind();      } -    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { -      OffloadAction::DeviceDependences DDep; -      DDep.add(*CudaDeviceActions[I], *CudaTC, CudaArchToString(GpuArchList[I]), -               Action::OFK_Cuda); -      Actions.push_back( -          C.MakeAction<OffloadAction>(DDep, CudaDeviceActions[I]->getType())); +    return false; +  } + +  /// Add the offloading top level actions to the provided action list. This +  /// function can replace the host action by a bundling action if the +  /// programming models allow it. +  bool appendTopLevelActions(ActionList &AL, Action *HostAction, +                             const Arg *InputArg) { +    // Get the device actions to be appended. +    ActionList OffloadAL; +    for (auto *SB : SpecializedBuilders) { +      if (!SB->isValid()) +        continue; +      SB->appendTopLevelActions(OffloadAL);      } -    // Kill host action in case of device-only compilation. -    if (CompileDeviceOnly) -      return nullptr; -    return HostAction; -  } - -  // If we're not a partial or device-only compilation, we compile each arch to -  // ptx and assemble to cubin, then feed the cubin *and* the ptx into a device -  // "link" action, which uses fatbinary to combine these cubins into one -  // fatbin.  The fatbin is then an input to the host compilation. -  ActionList DeviceActions; -  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { -    Action* AssembleAction = CudaDeviceActions[I]; -    assert(AssembleAction->getType() == types::TY_Object); -    assert(AssembleAction->getInputs().size() == 1); - -    Action* BackendAction = AssembleAction->getInputs()[0]; -    assert(BackendAction->getType() == types::TY_PP_Asm); - -    for (auto &A : {AssembleAction, BackendAction}) { -      OffloadAction::DeviceDependences DDep; -      DDep.add(*A, *CudaTC, CudaArchToString(GpuArchList[I]), Action::OFK_Cuda); -      DeviceActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType())); -    } -  } -  auto FatbinAction = -      C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN); - -  // Return a new host action that incorporates original host action and all -  // device actions. -  OffloadAction::HostDependence HDep( -      *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), -      /*BoundArch=*/nullptr, Action::OFK_Cuda); -  OffloadAction::DeviceDependences DDep; -  DDep.add(*FatbinAction, *CudaTC, /*BoundArch=*/nullptr, Action::OFK_Cuda); -  return C.MakeAction<OffloadAction>(HDep, DDep); -} + +    // If we can use the bundler, replace the host action by the bundling one in +    // the resulting list. Otherwise, just append the device actions. +    if (CanUseBundler && !OffloadAL.empty()) { +      // Add the host action to the list in order to create the bundling action. +      OffloadAL.push_back(HostAction); + +      // We expect that the host action was just appended to the action list +      // before this method was called. +      assert(HostAction == AL.back() && "Host action not in the list??"); +      HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL); +      AL.back() = HostAction; +    } else +      AL.append(OffloadAL.begin(), OffloadAL.end()); + +    // Propagate to the current host action (if any) the offload information +    // associated with the current input. +    if (HostAction) +      HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg], +                                           /*BoundArch=*/nullptr); +    return false; +  } + +  /// Processes the host linker action. This currently consists of replacing it +  /// with an offload action if there are device link objects and propagate to +  /// the host action all the offload kinds used in the current compilation. The +  /// resulting action is returned. +  Action *processHostLinkAction(Action *HostAction) { +    // Add all the dependences from the device linking actions. +    OffloadAction::DeviceDependences DDeps; +    for (auto *SB : SpecializedBuilders) { +      if (!SB->isValid()) +        continue; + +      SB->appendLinkDependences(DDeps); +    } + +    // Calculate all the offload kinds used in the current compilation. +    unsigned ActiveOffloadKinds = 0u; +    for (auto &I : InputArgToOffloadKindMap) +      ActiveOffloadKinds |= I.second; + +    // If we don't have device dependencies, we don't have to create an offload +    // action. +    if (DDeps.getActions().empty()) { +      // Propagate all the active kinds to host action. Given that it is a link +      // action it is assumed to depend on all actions generated so far. +      HostAction->propagateHostOffloadInfo(ActiveOffloadKinds, +                                           /*BoundArch=*/nullptr); +      return HostAction; +    } + +    // Create the offload action with all dependences. When an offload action +    // is created the kinds are propagated to the host action, so we don't have +    // to do that explicitly here. +    OffloadAction::HostDependence HDep( +        *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), +        /*BoundArch*/ nullptr, ActiveOffloadKinds); +    return C.MakeAction<OffloadAction>(HDep, DDeps); +  } +}; +} // anonymous namespace.  void Driver::BuildActions(Compilation &C, DerivedArgList &Args,                            const InputList &Inputs, ActionList &Actions) const { @@ -1621,8 +2445,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,      YcArg = YuArg = nullptr;    } -  // Track the host offload kinds used on this compilation. -  unsigned CompilationActiveOffloadHostKinds = 0u; +  // Builder to be used to build offloading actions. +  OffloadingActionBuilder OffloadBuilder(C, Args, Inputs);    // Construct the actions to perform.    ActionList LinkerInputs; @@ -1670,12 +2494,14 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,      if (YcArg) {        // Add a separate precompile phase for the compile phase.        if (FinalPhase >= phases::Compile) { +        const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType);          llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PCHPL; -        types::getCompilationPhases(types::TY_CXXHeader, PCHPL); +        types::getCompilationPhases(HeaderType, PCHPL);          Arg *PchInputArg = MakeInputArg(Args, Opts, YcArg->getValue());          // Build the pipeline for the pch file. -        Action *ClangClPch = C.MakeAction<InputAction>(*PchInputArg, InputType); +        Action *ClangClPch = +            C.MakeAction<InputAction>(*PchInputArg, HeaderType);          for (phases::ID Phase : PCHPL)            ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);          assert(ClangClPch); @@ -1686,17 +2512,14 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,        }      } -    phases::ID CudaInjectionPhase = -        (phases::Compile < FinalPhase && -         llvm::find(PL, phases::Compile) != PL.end()) -            ? phases::Compile -            : FinalPhase; - -    // Track the host offload kinds used on this input. -    unsigned InputActiveOffloadHostKinds = 0u; -      // Build the pipeline for this file.      Action *Current = C.MakeAction<InputAction>(*InputArg, InputType); + +    // Use the current host action in any of the offloading actions, if +    // required. +    if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) +      break; +      for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end();           i != e; ++i) {        phases::ID Phase = *i; @@ -1705,6 +2528,12 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,        if (Phase > FinalPhase)          break; +      // Add any offload action the host action depends on. +      Current = OffloadBuilder.addDeviceDependencesToHostAction( +          Current, InputArg, Phase, FinalPhase, PL); +      if (!Current) +        break; +        // Queue linker inputs.        if (Phase == phases::Link) {          assert((i + 1) == e && "linking must be final compilation step."); @@ -1713,48 +2542,37 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,          break;        } -      // Some types skip the assembler phase (e.g., llvm-bc), but we can't -      // encode this in the steps because the intermediate type depends on -      // arguments. Just special case here. -      if (Phase == phases::Assemble && Current->getType() != types::TY_PP_Asm) -        continue; -        // Otherwise construct the appropriate action. -      Current = ConstructPhaseAction(C, Args, Phase, Current); +      auto *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current); -      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) { -        Current = buildCudaActions(C, Args, InputArg, Current, Actions); -        if (!Current) -          break; +      // We didn't create a new action, so we will just move to the next phase. +      if (NewCurrent == Current) +        continue; -        // We produced a CUDA action for this input, so the host has to support -        // CUDA. -        InputActiveOffloadHostKinds |= Action::OFK_Cuda; -        CompilationActiveOffloadHostKinds |= Action::OFK_Cuda; -      } +      Current = NewCurrent; + +      // Use the current host action in any of the offloading actions, if +      // required. +      if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) +        break;        if (Current->getType() == types::TY_Nothing)          break;      } -    // If we ended with something, add to the output list. Also, propagate the -    // offload information to the top-level host action related with the current -    // input. -    if (Current) { -      if (InputActiveOffloadHostKinds) -        Current->propagateHostOffloadInfo(InputActiveOffloadHostKinds, -                                          /*BoundArch=*/nullptr); +    // If we ended with something, add to the output list. +    if (Current)        Actions.push_back(Current); -    } + +    // Add any top level actions generated for offloading. +    OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg);    } -  // Add a link action if necessary and propagate the offload information for -  // the current compilation. +  // Add a link action if necessary.    if (!LinkerInputs.empty()) { -    Actions.push_back( -        C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image)); -    Actions.back()->propagateHostOffloadInfo(CompilationActiveOffloadHostKinds, -                                             /*BoundArch=*/nullptr); +    Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image); +    LA = OffloadBuilder.processHostLinkAction(LA); +    Actions.push_back(LA);    }    // If we are linking, claim any options which are obviously only used for @@ -1776,6 +2594,13 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,  Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,                                       phases::ID Phase, Action *Input) const {    llvm::PrettyStackTraceString CrashInfo("Constructing phase actions"); + +  // Some types skip the assembler phase (e.g., llvm-bc), but we can't +  // encode this in the steps because the intermediate type depends on +  // arguments. Just special case here. +  if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm) +    return Input; +    // Build the appropriate action.    switch (Phase) {    case phases::Link: @@ -1797,7 +2622,9 @@ Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,      return C.MakeAction<PreprocessJobAction>(Input, OutputTy);    }    case phases::Precompile: { -    types::ID OutputTy = types::TY_PCH; +    types::ID OutputTy = getPrecompiledType(Input->getType()); +    assert(OutputTy != types::TY_INVALID && +           "Cannot precompile this input type!");      if (Args.hasArg(options::OPT_fsyntax_only)) {        // Syntax checks should not emit a PCH file        OutputTy = types::TY_Nothing; @@ -1888,11 +2715,11 @@ void Driver::BuildJobs(Compilation &C) const {      }      BuildJobsForAction(C, A, &C.getDefaultToolChain(), -                       /*BoundArch*/ nullptr, +                       /*BoundArch*/ StringRef(),                         /*AtTopLevel*/ true,                         /*MultipleArchs*/ ArchNames.size() > 1,                         /*LinkingOutput*/ LinkingOutput, CachedResults, -                       /*BuildForOffloadDevice*/ false); +                       /*TargetDeviceOffloadKind*/ Action::OFK_None);    }    // If the user passed -Qunused-arguments or there were errors, don't warn @@ -1941,177 +2768,335 @@ void Driver::BuildJobs(Compilation &C) const {      }    }  } -/// Collapse an offloading action looking for a job of the given type. The input -/// action is changed to the input of the collapsed sequence. If we effectively -/// had a collapse return the corresponding offloading action, otherwise return -/// null. -template <typename T> -static OffloadAction *collapseOffloadingAction(Action *&CurAction) { -  if (!CurAction) -    return nullptr; -  if (auto *OA = dyn_cast<OffloadAction>(CurAction)) { -    if (OA->hasHostDependence()) -      if (auto *HDep = dyn_cast<T>(OA->getHostDependence())) { -        CurAction = HDep; -        return OA; -      } -    if (OA->hasSingleDeviceDependence()) -      if (auto *DDep = dyn_cast<T>(OA->getSingleDeviceDependence())) { -        CurAction = DDep; -        return OA; + +namespace { +/// Utility class to control the collapse of dependent actions and select the +/// tools accordingly. +class ToolSelector final { +  /// The tool chain this selector refers to. +  const ToolChain &TC; + +  /// The compilation this selector refers to. +  const Compilation &C; + +  /// The base action this selector refers to. +  const JobAction *BaseAction; + +  /// Set to true if the current toolchain refers to host actions. +  bool IsHostSelector; + +  /// Set to true if save-temps and embed-bitcode functionalities are active. +  bool SaveTemps; +  bool EmbedBitcode; + +  /// Get previous dependent action or null if that does not exist. If +  /// \a CanBeCollapsed is false, that action must be legal to collapse or +  /// null will be returned. +  const JobAction *getPrevDependentAction(const ActionList &Inputs, +                                          ActionList &SavedOffloadAction, +                                          bool CanBeCollapsed = true) { +    // An option can be collapsed only if it has a single input. +    if (Inputs.size() != 1) +      return nullptr; + +    Action *CurAction = *Inputs.begin(); +    if (CanBeCollapsed && +        !CurAction->isCollapsingWithNextDependentActionLegal()) +      return nullptr; + +    // If the input action is an offload action. Look through it and save any +    // offload action that can be dropped in the event of a collapse. +    if (auto *OA = dyn_cast<OffloadAction>(CurAction)) { +      // If the dependent action is a device action, we will attempt to collapse +      // only with other device actions. Otherwise, we would do the same but +      // with host actions only. +      if (!IsHostSelector) { +        if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) { +          CurAction = +              OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true); +          if (CanBeCollapsed && +              !CurAction->isCollapsingWithNextDependentActionLegal()) +            return nullptr; +          SavedOffloadAction.push_back(OA); +          return dyn_cast<JobAction>(CurAction); +        } +      } else if (OA->hasHostDependence()) { +        CurAction = OA->getHostDependence(); +        if (CanBeCollapsed && +            !CurAction->isCollapsingWithNextDependentActionLegal()) +          return nullptr; +        SavedOffloadAction.push_back(OA); +        return dyn_cast<JobAction>(CurAction);        } -  } -  return nullptr; -} -// Returns a Tool for a given JobAction.  In case the action and its -// predecessors can be combined, updates Inputs with the inputs of the -// first combined action. If one of the collapsed actions is a -// CudaHostAction, updates CollapsedCHA with the pointer to it so the -// caller can deal with extra handling such action requires. -static const Tool *selectToolForJob(Compilation &C, bool SaveTemps, -                                    bool EmbedBitcode, const ToolChain *TC, -                                    const JobAction *JA, -                                    const ActionList *&Inputs, -                                    ActionList &CollapsedOffloadAction) { -  const Tool *ToolForJob = nullptr; -  CollapsedOffloadAction.clear(); - -  // See if we should look for a compiler with an integrated assembler. We match -  // bottom up, so what we are actually looking for is an assembler job with a -  // compiler input. - -  // Look through offload actions between assembler and backend actions. -  Action *BackendJA = (isa<AssembleJobAction>(JA) && Inputs->size() == 1) -                          ? *Inputs->begin() -                          : nullptr; -  auto *BackendOA = collapseOffloadingAction<BackendJobAction>(BackendJA); - -  if (TC->useIntegratedAs() && !SaveTemps && -      !C.getArgs().hasArg(options::OPT_via_file_asm) && -      !C.getArgs().hasArg(options::OPT__SLASH_FA) && -      !C.getArgs().hasArg(options::OPT__SLASH_Fa) && BackendJA && -      isa<BackendJobAction>(BackendJA)) { -    // A BackendJob is always preceded by a CompileJob, and without -save-temps -    // or -fembed-bitcode, they will always get combined together, so instead of -    // checking the backend tool, check if the tool for the CompileJob has an -    // integrated assembler. For -fembed-bitcode, CompileJob is still used to -    // look up tools for BackendJob, but they need to match before we can split -    // them. - -    // Look through offload actions between backend and compile actions. -    Action *CompileJA = *BackendJA->getInputs().begin(); -    auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA); - -    assert(CompileJA && isa<CompileJobAction>(CompileJA) && -           "Backend job is not preceeded by compile job."); -    const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA)); -    if (!Compiler)        return nullptr; +    } + +    return dyn_cast<JobAction>(CurAction); +  } + +  /// Return true if an assemble action can be collapsed. +  bool canCollapseAssembleAction() const { +    return TC.useIntegratedAs() && !SaveTemps && +           !C.getArgs().hasArg(options::OPT_via_file_asm) && +           !C.getArgs().hasArg(options::OPT__SLASH_FA) && +           !C.getArgs().hasArg(options::OPT__SLASH_Fa); +  } + +  /// Return true if a preprocessor action can be collapsed. +  bool canCollapsePreprocessorAction() const { +    return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) && +           !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps && +           !C.getArgs().hasArg(options::OPT_rewrite_objc); +  } + +  /// Struct that relates an action with the offload actions that would be +  /// collapsed with it. +  struct JobActionInfo final { +    /// The action this info refers to. +    const JobAction *JA = nullptr; +    /// The offload actions we need to take care off if this action is +    /// collapsed. +    ActionList SavedOffloadAction; +  }; + +  /// Append collapsed offload actions from the give nnumber of elements in the +  /// action info array. +  static void AppendCollapsedOffloadAction(ActionList &CollapsedOffloadAction, +                                           ArrayRef<JobActionInfo> &ActionInfo, +                                           unsigned ElementNum) { +    assert(ElementNum <= ActionInfo.size() && "Invalid number of elements."); +    for (unsigned I = 0; I < ElementNum; ++I) +      CollapsedOffloadAction.append(ActionInfo[I].SavedOffloadAction.begin(), +                                    ActionInfo[I].SavedOffloadAction.end()); +  } + +  /// Functions that attempt to perform the combining. They detect if that is +  /// legal, and if so they update the inputs \a Inputs and the offload action +  /// that were collapsed in \a CollapsedOffloadAction. A tool that deals with +  /// the combined action is returned. If the combining is not legal or if the +  /// tool does not exist, null is returned. +  /// Currently three kinds of collapsing are supported: +  ///  - Assemble + Backend + Compile; +  ///  - Assemble + Backend ; +  ///  - Backend + Compile. +  const Tool * +  combineAssembleBackendCompile(ArrayRef<JobActionInfo> ActionInfo, +                                const ActionList *&Inputs, +                                ActionList &CollapsedOffloadAction) { +    if (ActionInfo.size() < 3 || !canCollapseAssembleAction()) +      return nullptr; +    auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA); +    auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA); +    auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[2].JA); +    if (!AJ || !BJ || !CJ) +      return nullptr; + +    // Get compiler tool. +    const Tool *T = TC.SelectTool(*CJ); +    if (!T) +      return nullptr; +      // When using -fembed-bitcode, it is required to have the same tool (clang)      // for both CompilerJA and BackendJA. Otherwise, combine two stages.      if (EmbedBitcode) { -      JobAction *InputJA = cast<JobAction>(*Inputs->begin()); -      const Tool *BackendTool = TC->SelectTool(*InputJA); -      if (BackendTool == Compiler) -        CompileJA = InputJA; -    } -    if (Compiler->hasIntegratedAssembler()) { -      Inputs = &CompileJA->getInputs(); -      ToolForJob = Compiler; -      // Save the collapsed offload actions because they may still contain -      // device actions. -      if (CompileOA) -        CollapsedOffloadAction.push_back(CompileOA); -      if (BackendOA) -        CollapsedOffloadAction.push_back(BackendOA); -    } -  } - -  // A backend job should always be combined with the preceding compile job -  // unless OPT_save_temps or OPT_fembed_bitcode is enabled and the compiler is -  // capable of emitting LLVM IR as an intermediate output. -  if (isa<BackendJobAction>(JA)) { -    // Check if the compiler supports emitting LLVM IR. -    assert(Inputs->size() == 1); - -    // Look through offload actions between backend and compile actions. -    Action *CompileJA = *JA->getInputs().begin(); -    auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA); - -    assert(CompileJA && isa<CompileJobAction>(CompileJA) && -           "Backend job is not preceeded by compile job."); -    const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA)); -    if (!Compiler) +      const Tool *BT = TC.SelectTool(*BJ); +      if (BT == T) +        return nullptr; +    } + +    if (!T->hasIntegratedAssembler())        return nullptr; -    if (!Compiler->canEmitIR() || -        (!SaveTemps && !EmbedBitcode)) { -      Inputs = &CompileJA->getInputs(); -      ToolForJob = Compiler; -      if (CompileOA) -        CollapsedOffloadAction.push_back(CompileOA); -    } +    Inputs = &CJ->getInputs(); +    AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, +                                 /*NumElements=*/3); +    return T;    } +  const Tool *combineAssembleBackend(ArrayRef<JobActionInfo> ActionInfo, +                                     const ActionList *&Inputs, +                                     ActionList &CollapsedOffloadAction) { +    if (ActionInfo.size() < 2 || !canCollapseAssembleAction()) +      return nullptr; +    auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA); +    auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA); +    if (!AJ || !BJ) +      return nullptr; -  // Otherwise use the tool for the current job. -  if (!ToolForJob) -    ToolForJob = TC->SelectTool(*JA); +    // Retrieve the compile job, backend action must always be preceded by one. +    ActionList CompileJobOffloadActions; +    auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions, +                                      /*CanBeCollapsed=*/false); +    if (!AJ || !BJ || !CJ) +      return nullptr; -  // See if we should use an integrated preprocessor. We do so when we have -  // exactly one input, since this is the only use case we care about -  // (irrelevant since we don't support combine yet). +    assert(isa<CompileJobAction>(CJ) && +           "Expecting compile job preceding backend job."); -  // Look through offload actions after preprocessing. -  Action *PreprocessJA = (Inputs->size() == 1) ? *Inputs->begin() : nullptr; -  auto *PreprocessOA = -      collapseOffloadingAction<PreprocessJobAction>(PreprocessJA); +    // Get compiler tool. +    const Tool *T = TC.SelectTool(*CJ); +    if (!T) +      return nullptr; + +    if (!T->hasIntegratedAssembler()) +      return nullptr; -  if (PreprocessJA && isa<PreprocessJobAction>(PreprocessJA) && -      !C.getArgs().hasArg(options::OPT_no_integrated_cpp) && -      !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps && -      !C.getArgs().hasArg(options::OPT_rewrite_objc) && -      ToolForJob->hasIntegratedCPP()) { -    Inputs = &PreprocessJA->getInputs(); -    if (PreprocessOA) -      CollapsedOffloadAction.push_back(PreprocessOA); +    Inputs = &BJ->getInputs(); +    AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, +                                 /*NumElements=*/2); +    return T;    } +  const Tool *combineBackendCompile(ArrayRef<JobActionInfo> ActionInfo, +                                    const ActionList *&Inputs, +                                    ActionList &CollapsedOffloadAction) { +    if (ActionInfo.size() < 2 || !canCollapsePreprocessorAction()) +      return nullptr; +    auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[0].JA); +    auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[1].JA); +    if (!BJ || !CJ) +      return nullptr; + +    // Get compiler tool. +    const Tool *T = TC.SelectTool(*CJ); +    if (!T) +      return nullptr; + +    if (T->canEmitIR() && (SaveTemps || EmbedBitcode)) +      return nullptr; + +    Inputs = &CJ->getInputs(); +    AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, +                                 /*NumElements=*/2); +    return T; +  } + +  /// Updates the inputs if the obtained tool supports combining with +  /// preprocessor action, and the current input is indeed a preprocessor +  /// action. If combining results in the collapse of offloading actions, those +  /// are appended to \a CollapsedOffloadAction. +  void combineWithPreprocessor(const Tool *T, const ActionList *&Inputs, +                               ActionList &CollapsedOffloadAction) { +    if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP()) +      return; + +    // Attempt to get a preprocessor action dependence. +    ActionList PreprocessJobOffloadActions; +    auto *PJ = getPrevDependentAction(*Inputs, PreprocessJobOffloadActions); +    if (!PJ || !isa<PreprocessJobAction>(PJ)) +      return; + +    // This is legal to combine. Append any offload action we found and set the +    // current inputs to preprocessor inputs. +    CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(), +                                  PreprocessJobOffloadActions.end()); +    Inputs = &PJ->getInputs(); +  } + +public: +  ToolSelector(const JobAction *BaseAction, const ToolChain &TC, +               const Compilation &C, bool SaveTemps, bool EmbedBitcode) +      : TC(TC), C(C), BaseAction(BaseAction), SaveTemps(SaveTemps), +        EmbedBitcode(EmbedBitcode) { +    assert(BaseAction && "Invalid base action."); +    IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None; +  } + +  /// Check if a chain of actions can be combined and return the tool that can +  /// handle the combination of actions. The pointer to the current inputs \a +  /// Inputs and the list of offload actions \a CollapsedOffloadActions +  /// connected to collapsed actions are updated accordingly. The latter enables +  /// the caller of the selector to process them afterwards instead of just +  /// dropping them. If no suitable tool is found, null will be returned. +  const Tool *getTool(const ActionList *&Inputs, +                      ActionList &CollapsedOffloadAction) { +    // +    // Get the largest chain of actions that we could combine. +    // + +    SmallVector<JobActionInfo, 5> ActionChain(1); +    ActionChain.back().JA = BaseAction; +    while (ActionChain.back().JA) { +      const Action *CurAction = ActionChain.back().JA; + +      // Grow the chain by one element. +      ActionChain.resize(ActionChain.size() + 1); +      JobActionInfo &AI = ActionChain.back(); + +      // Attempt to fill it with the +      AI.JA = +          getPrevDependentAction(CurAction->getInputs(), AI.SavedOffloadAction); +    } + +    // Pop the last action info as it could not be filled. +    ActionChain.pop_back(); -  return ToolForJob; +    // +    // Attempt to combine actions. If all combining attempts failed, just return +    // the tool of the provided action. At the end we attempt to combine the +    // action with any preprocessor action it may depend on. +    // + +    const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs, +                                                  CollapsedOffloadAction); +    if (!T) +      T = combineAssembleBackend(ActionChain, Inputs, CollapsedOffloadAction); +    if (!T) +      T = combineBackendCompile(ActionChain, Inputs, CollapsedOffloadAction); +    if (!T) { +      Inputs = &BaseAction->getInputs(); +      T = TC.SelectTool(*BaseAction); +    } + +    combineWithPreprocessor(T, Inputs, CollapsedOffloadAction); +    return T; +  } +};  } -InputInfo Driver::BuildJobsForAction( -    Compilation &C, const Action *A, const ToolChain *TC, const char *BoundArch, -    bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, -    std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, -    bool BuildForOffloadDevice) const { -  // The bound arch is not necessarily represented in the toolchain's triple -- -  // for example, armv7 and armv7s both map to the same triple -- so we need -  // both in our map. +/// Return a string that uniquely identifies the result of a job. The bound arch +/// is not necessarily represented in the toolchain's triple -- for example, +/// armv7 and armv7s both map to the same triple -- so we need both in our map. +/// Also, we need to add the offloading device kind, as the same tool chain can +/// be used for host and device for some programming models, e.g. OpenMP. +static std::string GetTriplePlusArchString(const ToolChain *TC, +                                           StringRef BoundArch, +                                           Action::OffloadKind OffloadKind) {    std::string TriplePlusArch = TC->getTriple().normalize(); -  if (BoundArch) { +  if (!BoundArch.empty()) {      TriplePlusArch += "-";      TriplePlusArch += BoundArch;    } -  std::pair<const Action *, std::string> ActionTC = {A, TriplePlusArch}; +  TriplePlusArch += "-"; +  TriplePlusArch += Action::GetOffloadKindName(OffloadKind); +  return TriplePlusArch; +} + +InputInfo Driver::BuildJobsForAction( +    Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, +    bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, +    std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, +    Action::OffloadKind TargetDeviceOffloadKind) const { +  std::pair<const Action *, std::string> ActionTC = { +      A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)};    auto CachedResult = CachedResults.find(ActionTC);    if (CachedResult != CachedResults.end()) {      return CachedResult->second;    }    InputInfo Result = BuildJobsForActionNoCache(        C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, -      CachedResults, BuildForOffloadDevice); +      CachedResults, TargetDeviceOffloadKind);    CachedResults[ActionTC] = Result;    return Result;  }  InputInfo Driver::BuildJobsForActionNoCache( -    Compilation &C, const Action *A, const ToolChain *TC, const char *BoundArch, +    Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch,      bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput,      std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, -    bool BuildForOffloadDevice) const { +    Action::OffloadKind TargetDeviceOffloadKind) const {    llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");    InputInfoList OffloadDependencesInputInfo; +  bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None;    if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) {      // The offload action is expected to be used in four different situations.      // @@ -2121,7 +3106,7 @@ InputInfo Driver::BuildJobsForActionNoCache(      // b) Set a toolchain/architecture/kind for a device action;      //    Device Action 1 -> OffloadAction -> Device Action 2      // -    // c) Specify a device dependences to a host action; +    // c) Specify a device dependence to a host action;      //    Device Action 1  _      //                      \      //      Host Action 1  ---> OffloadAction -> Host Action 2 @@ -2144,7 +3129,7 @@ InputInfo Driver::BuildJobsForActionNoCache(          DevA =              BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel,                                 /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, -                               CachedResults, /*BuildForOffloadDevice=*/true); +                               CachedResults, DepA->getOffloadingDeviceKind());        });        return DevA;      } @@ -2154,16 +3139,15 @@ InputInfo Driver::BuildJobsForActionNoCache(      // generate the host dependences and override the action with the device      // dependence. The dependences can't therefore be a top-level action.      OA->doOnEachDependence( -        /*IsHostDependence=*/BuildForOffloadDevice, +        /*IsHostDependence=*/BuildingForOffloadDevice,          [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {            OffloadDependencesInputInfo.push_back(BuildJobsForAction(                C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false,                /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, -              /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() != -                  Action::OFK_None)); +              DepA->getOffloadingDeviceKind()));          }); -    A = BuildForOffloadDevice +    A = BuildingForOffloadDevice              ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)              : OA->getHostDependence();    } @@ -2182,9 +3166,9 @@ InputInfo Driver::BuildJobsForActionNoCache(    if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) {      const ToolChain *TC; -    const char *ArchName = BAA->getArchName(); +    StringRef ArchName = BAA->getArchName(); -    if (ArchName) +    if (!ArchName.empty())        TC = &getToolChain(C.getArgs(),                           computeTargetTriple(*this, DefaultTargetTriple,                                               C.getArgs(), ArchName)); @@ -2193,7 +3177,7 @@ InputInfo Driver::BuildJobsForActionNoCache(      return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel,                                MultipleArchs, LinkingOutput, CachedResults, -                              BuildForOffloadDevice); +                              TargetDeviceOffloadKind);    } @@ -2202,9 +3186,9 @@ InputInfo Driver::BuildJobsForActionNoCache(    const JobAction *JA = cast<JobAction>(A);    ActionList CollapsedOffloadActions; -  const Tool *T = -      selectToolForJob(C, isSaveTempsEnabled(), embedBitcodeEnabled(), TC, JA, -                       Inputs, CollapsedOffloadActions); +  ToolSelector TS(JA, *TC, C, isSaveTempsEnabled(), embedBitcodeInObject()); +  const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions); +    if (!T)      return InputInfo(); @@ -2212,13 +3196,12 @@ InputInfo Driver::BuildJobsForActionNoCache(    // need to build jobs for host/device-side inputs it may have held.    for (const auto *OA : CollapsedOffloadActions)      cast<OffloadAction>(OA)->doOnEachDependence( -        /*IsHostDependence=*/BuildForOffloadDevice, +        /*IsHostDependence=*/BuildingForOffloadDevice,          [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) {            OffloadDependencesInputInfo.push_back(BuildJobsForAction( -              C, DepA, DepTC, DepBoundArch, AtTopLevel, +              C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false,                /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, -              /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() != -                  Action::OFK_None)); +              DepA->getOffloadingDeviceKind()));          });    // Only use pipes when there is exactly one input. @@ -2231,7 +3214,7 @@ InputInfo Driver::BuildJobsForActionNoCache(          AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A));      InputInfos.push_back(BuildJobsForAction(          C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, -        CachedResults, BuildForOffloadDevice)); +        CachedResults, A->getOffloadingDeviceKind()));    }    // Always use the first input as the base input. @@ -2247,15 +3230,75 @@ InputInfo Driver::BuildJobsForActionNoCache(      InputInfos.append(OffloadDependencesInputInfo.begin(),                        OffloadDependencesInputInfo.end()); +  // Set the effective triple of the toolchain for the duration of this job. +  llvm::Triple EffectiveTriple; +  const ToolChain &ToolTC = T->getToolChain(); +  const ArgList &Args = +      C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind()); +  if (InputInfos.size() != 1) { +    EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args)); +  } else { +    // Pass along the input type if it can be unambiguously determined. +    EffectiveTriple = llvm::Triple( +        ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType())); +  } +  RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple); +    // Determine the place to write output to, if any.    InputInfo Result; -  if (JA->getType() == types::TY_Nothing) +  InputInfoList UnbundlingResults; +  if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(JA)) { +    // If we have an unbundling job, we need to create results for all the +    // outputs. We also update the results cache so that other actions using +    // this unbundling action can get the right results. +    for (auto &UI : UA->getDependentActionsInfo()) { +      assert(UI.DependentOffloadKind != Action::OFK_None && +             "Unbundling with no offloading??"); + +      // Unbundling actions are never at the top level. When we generate the +      // offloading prefix, we also do that for the host file because the +      // unbundling action does not change the type of the output which can +      // cause a overwrite. +      std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( +          UI.DependentOffloadKind, +          UI.DependentToolChain->getTriple().normalize(), +          /*CreatePrefixForHost=*/true); +      auto CurI = InputInfo( +          UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch, +                                 /*AtTopLevel=*/false, MultipleArchs, +                                 OffloadingPrefix), +          BaseInput); +      // Save the unbundling result. +      UnbundlingResults.push_back(CurI); + +      // Get the unique string identifier for this dependence and cache the +      // result. +      CachedResults[{A, GetTriplePlusArchString( +                            UI.DependentToolChain, UI.DependentBoundArch, +                            UI.DependentOffloadKind)}] = CurI; +    } + +    // Now that we have all the results generated, select the one that should be +    // returned for the current depending action. +    std::pair<const Action *, std::string> ActionTC = { +        A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; +    assert(CachedResults.find(ActionTC) != CachedResults.end() && +           "Result does not exist??"); +    Result = CachedResults[ActionTC]; +  } else if (JA->getType() == types::TY_Nothing)      Result = InputInfo(A, BaseInput); -  else +  else { +    // We only have to generate a prefix for the host if this is not a top-level +    // action. +    std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( +        A->getOffloadingDeviceKind(), TC->getTriple().normalize(), +        /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() && +            !AtTopLevel);      Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch,                                               AtTopLevel, MultipleArchs, -                                             TC->getTriple().normalize()), +                                             OffloadingPrefix),                         BaseInput); +  }    if (CCCPrintBindings && !CCGenDiagnostics) {      llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"' @@ -2265,10 +3308,28 @@ InputInfo Driver::BuildJobsForActionNoCache(        if (i + 1 != e)          llvm::errs() << ", ";      } -    llvm::errs() << "], output: " << Result.getAsString() << "\n"; +    if (UnbundlingResults.empty()) +      llvm::errs() << "], output: " << Result.getAsString() << "\n"; +    else { +      llvm::errs() << "], outputs: ["; +      for (unsigned i = 0, e = UnbundlingResults.size(); i != e; ++i) { +        llvm::errs() << UnbundlingResults[i].getAsString(); +        if (i + 1 != e) +          llvm::errs() << ", "; +      } +      llvm::errs() << "] \n"; +    }    } else { -    T->ConstructJob(C, *JA, Result, InputInfos, -                    C.getArgsForToolChain(TC, BoundArch), LinkingOutput); +    if (UnbundlingResults.empty()) +      T->ConstructJob( +          C, *JA, Result, InputInfos, +          C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), +          LinkingOutput); +    else +      T->ConstructJobMultipleOutputs( +          C, *JA, UnbundlingResults, InputInfos, +          C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), +          LinkingOutput);    }    return Result;  } @@ -2313,9 +3374,9 @@ static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue,  const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,                                         const char *BaseInput, -                                       const char *BoundArch, bool AtTopLevel, +                                       StringRef BoundArch, bool AtTopLevel,                                         bool MultipleArchs, -                                       StringRef NormalizedTriple) const { +                                       StringRef OffloadingPrefix) const {    llvm::PrettyStackTraceString CrashInfo("Computing output path");    // Output to a user requested destination?    if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) { @@ -2360,7 +3421,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,      std::pair<StringRef, StringRef> Split = Name.split('.');      std::string TmpName = GetTemporaryPath(          Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode())); -    return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); +    return C.addTempFile(C.getArgs().MakeArgString(TmpName));    }    SmallString<128> BasePath(BaseInput); @@ -2375,7 +3436,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,    // Determine what the derived output name should be.    const char *NamedOutput; -  if (JA.getType() == types::TY_Object && +  if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) &&        C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) {      // The /Fo or /o flag decides the object filename.      StringRef Val = @@ -2399,17 +3460,17 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,        // clang-cl uses BaseName for the executable name.        NamedOutput =            MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image); -    } else if (MultipleArchs && BoundArch) { +    } else {        SmallString<128> Output(getDefaultImageName()); -      Output += JA.getOffloadingFileNamePrefix(NormalizedTriple); -      Output += "-"; -      Output.append(BoundArch); +      Output += OffloadingPrefix; +      if (MultipleArchs && !BoundArch.empty()) { +        Output += "-"; +        Output.append(BoundArch); +      }        NamedOutput = C.getArgs().MakeArgString(Output.c_str()); -    } else { -      NamedOutput = getDefaultImageName();      }    } else if (JA.getType() == types::TY_PCH && IsCLMode()) { -    NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName).c_str()); +    NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName));    } else {      const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());      assert(Suffix && "All types used for output should have a suffix."); @@ -2418,8 +3479,8 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,      if (!types::appendSuffixForType(JA.getType()))        End = BaseName.rfind('.');      SmallString<128> Suffixed(BaseName.substr(0, End)); -    Suffixed += JA.getOffloadingFileNamePrefix(NormalizedTriple); -    if (MultipleArchs && BoundArch) { +    Suffixed += OffloadingPrefix; +    if (MultipleArchs && !BoundArch.empty()) {        Suffixed += "-";        Suffixed.append(BoundArch);      } @@ -2459,7 +3520,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,        std::pair<StringRef, StringRef> Split = Name.split('.');        std::string TmpName = GetTemporaryPath(            Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode())); -      return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); +      return C.addTempFile(C.getArgs().MakeArgString(TmpName));      }    } @@ -2476,7 +3537,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,    }  } -std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const { +std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {    // Respect a limited subset of the '-Bprefix' functionality in GCC by    // attempting to use this prefix when looking for file paths.    for (const std::string &Dir : PrefixDirs) { @@ -2506,16 +3567,16 @@ std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const {  }  void Driver::generatePrefixedToolNames( -    const char *Tool, const ToolChain &TC, +    StringRef Tool, const ToolChain &TC,      SmallVectorImpl<std::string> &Names) const {    // FIXME: Needs a better variable than DefaultTargetTriple -  Names.emplace_back(DefaultTargetTriple + "-" + Tool); +  Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());    Names.emplace_back(Tool);    // Allow the discovery of tools prefixed with LLVM's default target triple.    std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple();    if (LLVMDefaultTargetTriple != DefaultTargetTriple) -    Names.emplace_back(LLVMDefaultTargetTriple + "-" + Tool); +    Names.emplace_back((LLVMDefaultTargetTriple + "-" + Tool).str());  }  static bool ScanDirForExecutable(SmallString<128> &Dir, @@ -2529,8 +3590,7 @@ static bool ScanDirForExecutable(SmallString<128> &Dir,    return false;  } -std::string Driver::GetProgramPath(const char *Name, -                                   const ToolChain &TC) const { +std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {    SmallVector<std::string, 2> TargetSpecificExecutables;    generatePrefixedToolNames(Name, TC, TargetSpecificExecutables); @@ -2542,7 +3602,7 @@ std::string Driver::GetProgramPath(const char *Name,        if (ScanDirForExecutable(P, TargetSpecificExecutables))          return P.str();      } else { -      SmallString<128> P(PrefixDir + Name); +      SmallString<128> P((PrefixDir + Name).str());        if (llvm::sys::fs::can_execute(Twine(P)))          return P.str();      } @@ -2564,8 +3624,7 @@ std::string Driver::GetProgramPath(const char *Name,    return Name;  } -std::string Driver::GetTemporaryPath(StringRef Prefix, -                                     const char *Suffix) const { +std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const {    SmallString<128> Path;    std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path);    if (EC) { @@ -2645,6 +3704,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,      case llvm::Triple::NaCl:        TC = new toolchains::NaClToolChain(*this, Target, Args);        break; +    case llvm::Triple::Fuchsia: +      TC = new toolchains::Fuchsia(*this, Target, Args); +      break;      case llvm::Triple::Solaris:        TC = new toolchains::Solaris(*this, Target, Args);        break; @@ -2673,12 +3735,12 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,          break;        }        break; -    case llvm::Triple::CUDA: -      TC = new toolchains::CudaToolChain(*this, Target, Args); -      break;      case llvm::Triple::PS4:        TC = new toolchains::PS4CPU(*this, Target, Args);        break; +    case llvm::Triple::Contiki: +      TC = new toolchains::Contiki(*this, Target, Args); +      break;      default:        // Of these targets, Hexagon is the only one that might have        // an OS of Linux, in which case it got handled above already. @@ -2686,6 +3748,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,        case llvm::Triple::tce:          TC = new toolchains::TCEToolChain(*this, Target, Args);          break; +      case llvm::Triple::tcele: +        TC = new toolchains::TCELEToolChain(*this, Target, Args); +        break;        case llvm::Triple::hexagon:          TC = new toolchains::HexagonToolChain(*this, Target, Args);          break; @@ -2711,6 +3776,12 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,        }      }    } + +  // Intentionally omitted from the switch above: llvm::Triple::CUDA.  CUDA +  // compiles always need two toolchains, the CUDA toolchain and the host +  // toolchain.  So the only valid way to create a CUDA toolchain is via +  // CreateOffloadingDeviceToolChains. +    return *TC;  } @@ -2733,36 +3804,35 @@ bool Driver::ShouldUseClangCompiler(const JobAction &JA) const {  ///  /// \return True if the entire string was parsed (9.2), or all groups were  /// parsed (10.3.5extrastuff). -bool Driver::GetReleaseVersion(const char *Str, unsigned &Major, -                               unsigned &Minor, unsigned &Micro, -                               bool &HadExtra) { +bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor, +                               unsigned &Micro, bool &HadExtra) {    HadExtra = false;    Major = Minor = Micro = 0; -  if (*Str == '\0') +  if (Str.empty())      return false; -  char *End; -  Major = (unsigned)strtol(Str, &End, 10); -  if (*Str != '\0' && *End == '\0') +  if (Str.consumeInteger(10, Major)) +    return false; +  if (Str.empty())      return true; -  if (*End != '.') +  if (Str[0] != '.')      return false; -  Str = End + 1; -  Minor = (unsigned)strtol(Str, &End, 10); -  if (*Str != '\0' && *End == '\0') +  Str = Str.drop_front(1); + +  if (Str.consumeInteger(10, Minor)) +    return false; +  if (Str.empty())      return true; -  if (*End != '.') +  if (Str[0] != '.')      return false; +  Str = Str.drop_front(1); -  Str = End + 1; -  Micro = (unsigned)strtol(Str, &End, 10); -  if (*Str != '\0' && *End == '\0') -    return true; -  if (Str == End) +  if (Str.consumeInteger(10, Micro))      return false; -  HadExtra = true; +  if (!Str.empty()) +    HadExtra = true;    return true;  } @@ -2772,21 +3842,22 @@ bool Driver::GetReleaseVersion(const char *Str, unsigned &Major,  ///  /// \return True if the entire string was parsed and there are  /// no extra characters remaining at the end. -bool Driver::GetReleaseVersion(const char *Str, +bool Driver::GetReleaseVersion(StringRef Str,                                 MutableArrayRef<unsigned> Digits) { -  if (*Str == '\0') +  if (Str.empty())      return false; -  char *End;    unsigned CurDigit = 0;    while (CurDigit < Digits.size()) { -    unsigned Digit = (unsigned)strtol(Str, &End, 10); +    unsigned Digit; +    if (Str.consumeInteger(10, Digit)) +      return false;      Digits[CurDigit] = Digit; -    if (*Str != '\0' && *End == '\0') +    if (Str.empty())        return true; -    if (*End != '.' || Str == End) +    if (Str[0] != '.')        return false; -    Str = End + 1; +    Str = Str.drop_front(1);      CurDigit++;    }  | 
