1 files changed, 733 insertions, 259 deletions
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 9ae33b80f8898..1dfcacc75ea5b 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -12,6 +12,7 @@
 #include "ToolChains/AMDGPU.h"
 #include "ToolChains/AVR.h"
 #include "ToolChains/Ananas.h"
+#include "ToolChains/BareMetal.h"
 #include "ToolChains/Clang.h"
 #include "ToolChains/CloudABI.h"
 #include "ToolChains/Contiki.h"
@@ -22,15 +23,15 @@
 #include "ToolChains/FreeBSD.h"
 #include "ToolChains/Fuchsia.h"
 #include "ToolChains/Gnu.h"
-#include "ToolChains/BareMetal.h"
+#include "ToolChains/HIP.h"
 #include "ToolChains/Haiku.h"
 #include "ToolChains/Hexagon.h"
 #include "ToolChains/Lanai.h"
 #include "ToolChains/Linux.h"
+#include "ToolChains/MSVC.h"
 #include "ToolChains/MinGW.h"
 #include "ToolChains/Minix.h"
 #include "ToolChains/MipsLinux.h"
-#include "ToolChains/MSVC.h"
 #include "ToolChains/Myriad.h"
 #include "ToolChains/NaCl.h"
 #include "ToolChains/NetBSD.h"
@@ -57,17 +58,20 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/OptSpecifier.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
@@ -81,19 +85,20 @@ using namespace clang::driver;
 using namespace clang;
 using namespace llvm::opt;
 
-Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
+Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple,
                DiagnosticsEngine &Diags,
                IntrusiveRefCntPtr<vfs::FileSystem> VFS)
     : Opts(createDriverOptTable()), Diags(Diags), VFS(std::move(VFS)),
       Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone),
       LTOMode(LTOK_None), ClangExecutable(ClangExecutable),
-      SysRoot(DEFAULT_SYSROOT), 
-      DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),
-      CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr),
-      CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false),
-      CCGenDiagnostics(false), DefaultTargetTriple(DefaultTargetTriple),
-      CCCGenericGCCName(""), CheckInputsExist(true), CCCUsePCH(true),
-      GenReproducer(false), SuppressMissingInputWarning(false) {
+      SysRoot(DEFAULT_SYSROOT), DriverTitle("clang LLVM compiler"),
+      CCPrintOptionsFilename(nullptr), CCPrintHeadersFilename(nullptr),
+      CCLogDiagnosticsFilename(nullptr), CCCPrintBindings(false),
+      CCPrintOptions(false), CCPrintHeaders(false), CCLogDiagnostics(false),
+      CCGenDiagnostics(false), TargetTriple(TargetTriple),
+      CCCGenericGCCName(""), Saver(Alloc), CheckInputsExist(true),
+      CCCUsePCH(true), GenReproducer(false),
+      SuppressMissingInputWarning(false) {
 
   // Provide a sane fallback if no VFS is specified.
   if (!this->VFS)
@@ -103,6 +108,13 @@ Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
   Dir = llvm::sys::path::parent_path(ClangExecutable);
   InstalledDir = Dir; // Provide a sensible default installed dir.
 
+#if defined(CLANG_CONFIG_FILE_SYSTEM_DIR)
+  SystemConfigDir = CLANG_CONFIG_FILE_SYSTEM_DIR;
+#endif
+#if defined(CLANG_CONFIG_FILE_USER_DIR)
+  UserConfigDir = CLANG_CONFIG_FILE_USER_DIR;
+#endif
+
   // Compute the path to the resource directory.
   StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
   SmallString<128> P(Dir);
@@ -119,11 +131,12 @@ Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
 
 void Driver::ParseDriverMode(StringRef ProgramName,
                              ArrayRef<const char *> Args) {
-  ClangNameParts = ToolChain::getTargetAndModeFromProgramName(ProgramName);
+  if (ClangNameParts.isEmpty())
+    ClangNameParts = ToolChain::getTargetAndModeFromProgramName(ProgramName);
   setDriverModeFromOption(ClangNameParts.DriverMode);
 
   for (const char *ArgPtr : Args) {
-    // Ingore nullptrs, they are response file's EOL markers
+    // Ignore nullptrs, they are the response file's EOL markers.
     if (ArgPtr == nullptr)
       continue;
     const StringRef Arg = ArgPtr;
@@ -138,15 +151,13 @@ void Driver::setDriverModeFromOption(StringRef Opt) {
     return;
   StringRef Value = Opt.drop_front(OptName.size());
 
-  const unsigned M = llvm::StringSwitch<unsigned>(Value)
-                         .Case("gcc", GCCMode)
-                         .Case("g++", GXXMode)
-                         .Case("cpp", CPPMode)
-                         .Case("cl", CLMode)
-                         .Default(~0U);
-
-  if (M != ~0U)
-    Mode = static_cast<DriverMode>(M);
+  if (auto M = llvm::StringSwitch<llvm::Optional<DriverMode>>(Value)
+                   .Case("gcc", GCCMode)
+                   .Case("g++", GXXMode)
+                   .Case("cpp", CPPMode)
+                   .Case("cl", CLMode)
+                   .Default(None))
+    Mode = *M;
   else
     Diag(diag::err_drv_unsupported_option_argument) << OptName << Value;
 }
@@ -178,9 +189,19 @@ InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings,
   // Check for unsupported options.
   for (const Arg *A : Args) {
     if (A->getOption().hasFlag(options::Unsupported)) {
-      Diag(diag::err_drv_unsupported_opt) << A->getAsString(Args);
-      ContainsError |= Diags.getDiagnosticLevel(diag::err_drv_unsupported_opt,
-                                                SourceLocation()) >
+      unsigned DiagID;
+      auto ArgString = A->getAsString(Args);
+      std::string Nearest;
+      if (getOpts().findNearest(
+            ArgString, Nearest, IncludedFlagsBitmask,
+            ExcludedFlagsBitmask | options::Unsupported) > 1) {
+        DiagID = diag::err_drv_unsupported_opt;
+        Diag(DiagID) << ArgString;
+      } else {
+        DiagID = diag::err_drv_unsupported_opt_with_suggestion;
+        Diag(DiagID) << ArgString << Nearest;
+      }
+      ContainsError |= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
                        DiagnosticsEngine::Warning;
       continue;
     }
@@ -195,11 +216,20 @@ InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings,
   }
 
   for (const Arg *A : Args.filtered(options::OPT_UNKNOWN)) {
-    auto ID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
-                         : diag::err_drv_unknown_argument;
-
-    Diags.Report(ID) << A->getAsString(Args);
-    ContainsError |= Diags.getDiagnosticLevel(ID, SourceLocation()) >
+    unsigned DiagID;
+    auto ArgString = A->getAsString(Args);
+    std::string Nearest;
+    if (getOpts().findNearest(
+          ArgString, Nearest, IncludedFlagsBitmask, ExcludedFlagsBitmask) > 1) {
+      DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl
+                          : diag::err_drv_unknown_argument;
+      Diags.Report(DiagID) << ArgString;
+    } else {
+      DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl_with_suggestion
+                          : diag::err_drv_unknown_argument_with_suggestion;
+      Diags.Report(DiagID) << ArgString << Nearest;
+    }
+    ContainsError |= Diags.getDiagnosticLevel(DiagID, SourceLocation()) >
                      DiagnosticsEngine::Warning;
   }
 
@@ -256,11 +286,12 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
 }
 
 static Arg *MakeInputArg(DerivedArgList &Args, OptTable &Opts,
-                         StringRef Value) {
+                         StringRef Value, bool Claim = true) {
   Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value,
                    Args.getBaseArgs().MakeIndex(Value), Value.data());
   Args.AddSynthesizedArg(A);
-  A->claim();
+  if (Claim)
+    A->claim();
   return A;
 }
 
@@ -328,7 +359,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
     if (A->getOption().matches(options::OPT__DASH_DASH)) {
       A->claim();
       for (StringRef Val : A->getValues())
-        DAL->append(MakeInputArg(*DAL, *Opts, Val));
+        DAL->append(MakeInputArg(*DAL, *Opts, Val, false));
       continue;
     }
 
@@ -353,23 +384,23 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
   return DAL;
 }
 
-/// \brief Compute target triple from args.
+/// Compute target triple from args.
 ///
 /// This routine provides the logic to compute a target triple from various
 /// args passed to the driver and the default triple string.
 static llvm::Triple computeTargetTriple(const Driver &D,
-                                        StringRef DefaultTargetTriple,
+                                        StringRef TargetTriple,
                                         const ArgList &Args,
                                         StringRef DarwinArchName = "") {
   // FIXME: Already done in Compilation *Driver::BuildCompilation
   if (const Arg *A = Args.getLastArg(options::OPT_target))
-    DefaultTargetTriple = A->getValue();
+    TargetTriple = A->getValue();
 
-  llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
+  llvm::Triple Target(llvm::Triple::normalize(TargetTriple));
 
   // Handle Apple-specific options available here.
   if (Target.isOSBinFormatMachO()) {
-    // If an explict Darwin arch name is given, that trumps all.
+    // If an explicit Darwin arch name is given, that trumps all.
     if (!DarwinArchName.empty()) {
       tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName);
       return Target;
@@ -452,7 +483,7 @@ static llvm::Triple computeTargetTriple(const Driver &D,
   return Target;
 }
 
-// \brief Parse the LTO options and record the type of LTO compilation
+// Parse the LTO options and record the type of LTO compilation
 // based on which -f(no-)?lto(=.*)? option occurs last.
 void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
   LTOMode = LTOK_None;
@@ -508,24 +539,55 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
                                               InputList &Inputs) {
 
   //
-  // CUDA
+  // CUDA/HIP
   //
-  // We need to generate a CUDA toolchain if any of the inputs has a CUDA type.
-  if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
+  // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
+  // or HIP type. However, mixed CUDA/HIP compilation is not supported.
+  bool IsCuda =
+      llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
         return types::isCuda(I.first);
-      })) {
+      });
+  bool IsHIP =
+      llvm::any_of(Inputs,
+                   [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
+                     return types::isHIP(I.first);
+                   }) ||
+      C.getInputArgs().hasArg(options::OPT_hip_link);
+  if (IsCuda && IsHIP) {
+    Diag(clang::diag::err_drv_mix_cuda_hip);
+    return;
+  }
+  if (IsCuda) {
     const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
     const llvm::Triple &HostTriple = HostTC->getTriple();
-    llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda"
-                                                     : "nvptx-nvidia-cuda");
-    // Use the CUDA and host triples as the key into the ToolChains map, because
-    // the device toolchain we create depends on both.
+    StringRef DeviceTripleStr;
+    auto OFK = Action::OFK_Cuda;
+    DeviceTripleStr =
+        HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda";
+    llvm::Triple CudaTriple(DeviceTripleStr);
+    // Use the CUDA and host triples as the key into the ToolChains map,
+    // because the device toolchain we create depends on both.
     auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()];
     if (!CudaTC) {
       CudaTC = llvm::make_unique<toolchains::CudaToolChain>(
-          *this, CudaTriple, *HostTC, C.getInputArgs(), Action::OFK_Cuda);
+          *this, CudaTriple, *HostTC, C.getInputArgs(), OFK);
+    }
+    C.addOffloadDeviceToolChain(CudaTC.get(), OFK);
+  } else if (IsHIP) {
+    const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
+    const llvm::Triple &HostTriple = HostTC->getTriple();
+    StringRef DeviceTripleStr;
+    auto OFK = Action::OFK_HIP;
+    DeviceTripleStr = "amdgcn-amd-amdhsa";
+    llvm::Triple HIPTriple(DeviceTripleStr);
+    // Use the HIP and host triples as the key into the ToolChains map,
+    // because the device toolchain we create depends on both.
+    auto &HIPTC = ToolChains[HIPTriple.str() + "/" + HostTriple.str()];
+    if (!HIPTC) {
+      HIPTC = llvm::make_unique<toolchains::HIPToolChain>(
+          *this, HIPTriple, *HostTC, C.getInputArgs());
     }
-    C.addOffloadDeviceToolChain(CudaTC.get(), Action::OFK_Cuda);
+    C.addOffloadDeviceToolChain(HIPTC.get(), OFK);
   }
 
   //
@@ -600,6 +662,216 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
   //
 }
 
+/// Looks the given directories for the specified file.
+///
+/// \param[out] FilePath File path, if the file was found.
+/// \param[in]  Dirs Directories used for the search.
+/// \param[in]  FileName Name of the file to search for.
+/// \return True if file was found.
+///
+/// Looks for file specified by FileName sequentially in directories specified
+/// by Dirs.
+///
+static bool searchForFile(SmallVectorImpl<char> &FilePath,
+                          ArrayRef<std::string> Dirs,
+                          StringRef FileName) {
+  SmallString<128> WPath;
+  for (const StringRef &Dir : Dirs) {
+    if (Dir.empty())
+      continue;
+    WPath.clear();
+    llvm::sys::path::append(WPath, Dir, FileName);
+    llvm::sys::path::native(WPath);
+    if (llvm::sys::fs::is_regular_file(WPath)) {
+      FilePath = std::move(WPath);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Driver::readConfigFile(StringRef FileName) {
+  // Try reading the given file.
+  SmallVector<const char *, 32> NewCfgArgs;
+  if (!llvm::cl::readConfigFile(FileName, Saver, NewCfgArgs)) {
+    Diag(diag::err_drv_cannot_read_config_file) << FileName;
+    return true;
+  }
+
+  // Read options from config file.
+  llvm::SmallString<128> CfgFileName(FileName);
+  llvm::sys::path::native(CfgFileName);
+  ConfigFile = CfgFileName.str();
+  bool ContainErrors;
+  CfgOptions = llvm::make_unique<InputArgList>(
+      ParseArgStrings(NewCfgArgs, ContainErrors));
+  if (ContainErrors) {
+    CfgOptions.reset();
+    return true;
+  }
+
+  if (CfgOptions->hasArg(options::OPT_config)) {
+    CfgOptions.reset();
+    Diag(diag::err_drv_nested_config_file);
+    return true;
+  }
+
+  // Claim all arguments that come from a configuration file so that the driver
+  // does not warn on any that is unused.
+  for (Arg *A : *CfgOptions)
+    A->claim();
+  return false;
+}
+
+bool Driver::loadConfigFile() {
+  std::string CfgFileName;
+  bool FileSpecifiedExplicitly = false;
+
+  // Process options that change search path for config files.
+  if (CLOptions) {
+    if (CLOptions->hasArg(options::OPT_config_system_dir_EQ)) {
+      SmallString<128> CfgDir;
+      CfgDir.append(
+          CLOptions->getLastArgValue(options::OPT_config_system_dir_EQ));
+      if (!CfgDir.empty()) {
+        if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
+          SystemConfigDir.clear();
+        else
+          SystemConfigDir = std::string(CfgDir.begin(), CfgDir.end());
+      }
+    }
+    if (CLOptions->hasArg(options::OPT_config_user_dir_EQ)) {
+      SmallString<128> CfgDir;
+      CfgDir.append(
+          CLOptions->getLastArgValue(options::OPT_config_user_dir_EQ));
+      if (!CfgDir.empty()) {
+        if (llvm::sys::fs::make_absolute(CfgDir).value() != 0)
+          UserConfigDir.clear();
+        else
+          UserConfigDir = std::string(CfgDir.begin(), CfgDir.end());
+      }
+    }
+  }
+
+  // First try to find config file specified in command line.
+  if (CLOptions) {
+    std::vector<std::string> ConfigFiles =
+        CLOptions->getAllArgValues(options::OPT_config);
+    if (ConfigFiles.size() > 1) {
+      Diag(diag::err_drv_duplicate_config);
+      return true;
+    }
+
+    if (!ConfigFiles.empty()) {
+      CfgFileName = ConfigFiles.front();
+      assert(!CfgFileName.empty());
+
+      // If argument contains directory separator, treat it as a path to
+      // configuration file.
+      if (llvm::sys::path::has_parent_path(CfgFileName)) {
+        SmallString<128> CfgFilePath;
+        if (llvm::sys::path::is_relative(CfgFileName))
+          llvm::sys::fs::current_path(CfgFilePath);
+        llvm::sys::path::append(CfgFilePath, CfgFileName);
+        if (!llvm::sys::fs::is_regular_file(CfgFilePath)) {
+          Diag(diag::err_drv_config_file_not_exist) << CfgFilePath;
+          return true;
+        }
+        return readConfigFile(CfgFilePath);
+      }
+
+      FileSpecifiedExplicitly = true;
+    }
+  }
+
+  // If config file is not specified explicitly, try to deduce configuration
+  // from executable name. For instance, an executable 'armv7l-clang' will
+  // search for config file 'armv7l-clang.cfg'.
+  if (CfgFileName.empty() && !ClangNameParts.TargetPrefix.empty())
+    CfgFileName = ClangNameParts.TargetPrefix + '-' + ClangNameParts.ModeSuffix;
+
+  if (CfgFileName.empty())
+    return false;
+
+  // Determine architecture part of the file name, if it is present.
+  StringRef CfgFileArch = CfgFileName;
+  size_t ArchPrefixLen = CfgFileArch.find('-');
+  if (ArchPrefixLen == StringRef::npos)
+    ArchPrefixLen = CfgFileArch.size();
+  llvm::Triple CfgTriple;
+  CfgFileArch = CfgFileArch.take_front(ArchPrefixLen);
+  CfgTriple = llvm::Triple(llvm::Triple::normalize(CfgFileArch));
+  if (CfgTriple.getArch() == llvm::Triple::ArchType::UnknownArch)
+    ArchPrefixLen = 0;
+
+  if (!StringRef(CfgFileName).endswith(".cfg"))
+    CfgFileName += ".cfg";
+
+  // If config file starts with architecture name and command line options
+  // redefine architecture (with options like -m32 -LE etc), try finding new
+  // config file with that architecture.
+  SmallString<128> FixedConfigFile;
+  size_t FixedArchPrefixLen = 0;
+  if (ArchPrefixLen) {
+    // Get architecture name from config file name like 'i386.cfg' or
+    // 'armv7l-clang.cfg'.
+    // Check if command line options changes effective triple.
+    llvm::Triple EffectiveTriple = computeTargetTriple(*this,
+                                             CfgTriple.getTriple(), *CLOptions);
+    if (CfgTriple.getArch() != EffectiveTriple.getArch()) {
+      FixedConfigFile = EffectiveTriple.getArchName();
+      FixedArchPrefixLen = FixedConfigFile.size();
+      // Append the rest of original file name so that file name transforms
+      // like: i386-clang.cfg -> x86_64-clang.cfg.
+      if (ArchPrefixLen < CfgFileName.size())
+        FixedConfigFile += CfgFileName.substr(ArchPrefixLen);
+    }
+  }
+
+  // Prepare list of directories where config file is searched for.
+  SmallVector<std::string, 3> CfgFileSearchDirs;
+  CfgFileSearchDirs.push_back(UserConfigDir);
+  CfgFileSearchDirs.push_back(SystemConfigDir);
+  CfgFileSearchDirs.push_back(Dir);
+
+  // Try to find config file. First try file with corrected architecture.
+  llvm::SmallString<128> CfgFilePath;
+  if (!FixedConfigFile.empty()) {
+    if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
+      return readConfigFile(CfgFilePath);
+    // If 'x86_64-clang.cfg' was not found, try 'x86_64.cfg'.
+    FixedConfigFile.resize(FixedArchPrefixLen);
+    FixedConfigFile.append(".cfg");
+    if (searchForFile(CfgFilePath, CfgFileSearchDirs, FixedConfigFile))
+      return readConfigFile(CfgFilePath);
+  }
+
+  // Then try original file name.
+  if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
+    return readConfigFile(CfgFilePath);
+
+  // Finally try removing driver mode part: 'x86_64-clang.cfg' -> 'x86_64.cfg'.
+  if (!ClangNameParts.ModeSuffix.empty() &&
+      !ClangNameParts.TargetPrefix.empty()) {
+    CfgFileName.assign(ClangNameParts.TargetPrefix);
+    CfgFileName.append(".cfg");
+    if (searchForFile(CfgFilePath, CfgFileSearchDirs, CfgFileName))
+      return readConfigFile(CfgFilePath);
+  }
+
+  // Report error but only if config file was specified explicitly, by option
+  // --config. If it was deduced from executable name, it is not an error.
+  if (FileSpecifiedExplicitly) {
+    Diag(diag::err_drv_config_file_not_found) << CfgFileName;
+    for (const std::string &SearchDir : CfgFileSearchDirs)
+      if (!SearchDir.empty())
+        Diag(diag::note_drv_config_file_searched_in) << SearchDir;
+    return true;
+  }
+
+  return false;
+}
+
 Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   llvm::PrettyStackTraceString CrashInfo("Compilation construction");
 
@@ -623,12 +895,38 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
 
   // FIXME: What are we going to do with -V and -b?
 
+  // Arguments specified in command line.
+  bool ContainsError;
+  CLOptions = llvm::make_unique<InputArgList>(
+      ParseArgStrings(ArgList.slice(1), ContainsError));
+
+  // Try parsing configuration file.
+  if (!ContainsError)
+    ContainsError = loadConfigFile();
+  bool HasConfigFile = !ContainsError && (CfgOptions.get() != nullptr);
+
+  // All arguments, from both config file and command line.
+  InputArgList Args = std::move(HasConfigFile ? std::move(*CfgOptions)
+                                              : std::move(*CLOptions));
+  if (HasConfigFile)
+    for (auto *Opt : *CLOptions) {
+      if (Opt->getOption().matches(options::OPT_config))
+        continue;
+      unsigned Index = Args.MakeIndex(Opt->getSpelling());
+      const Arg *BaseArg = &Opt->getBaseArg();
+      if (BaseArg == Opt)
+        BaseArg = nullptr;
+      Arg *Copy = new llvm::opt::Arg(Opt->getOption(), Opt->getSpelling(),
+                                     Index, BaseArg);
+      Copy->getValues() = Opt->getValues();
+      if (Opt->isClaimed())
+        Copy->claim();
+      Args.append(Copy);
+    }
+
   // FIXME: This stuff needs to go into the Compilation, not the driver.
   bool CCCPrintPhases;
 
-  bool ContainsError;
-  InputArgList Args = ParseArgStrings(ArgList.slice(1), ContainsError);
-
   // Silence driver warnings if requested
   Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
 
@@ -653,19 +951,19 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   GenReproducer = Args.hasFlag(options::OPT_gen_reproducer,
                                options::OPT_fno_crash_diagnostics,
                                !!::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"));
-  // FIXME: DefaultTargetTriple is used by the target-prefixed calls to as/ld
+  // FIXME: TargetTriple is used by the target-prefixed calls to as/ld
   // and getToolChain is const.
   if (IsCLMode()) {
     // clang-cl targets MSVC-style Win32.
-    llvm::Triple T(DefaultTargetTriple);
+    llvm::Triple T(TargetTriple);
     T.setOS(llvm::Triple::Win32);
     T.setVendor(llvm::Triple::PC);
     T.setEnvironment(llvm::Triple::MSVC);
     T.setObjectFormat(llvm::Triple::COFF);
-    DefaultTargetTriple = T.str();
+    TargetTriple = T.str();
   }
   if (const Arg *A = Args.getLastArg(options::OPT_target))
-    DefaultTargetTriple = A->getValue();
+    TargetTriple = A->getValue();
   if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir))
     Dir = InstalledDir = A->getValue();
   for (const Arg *A : Args.filtered(options::OPT_B)) {
@@ -713,7 +1011,7 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
 
   // Owned by the host.
   const ToolChain &TC = getToolChain(
-      *UArgs, computeTargetTriple(*this, DefaultTargetTriple, *UArgs));
+      *UArgs, computeTargetTriple(*this, TargetTriple, *UArgs));
 
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs,
@@ -851,8 +1149,9 @@ bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename,
 // When clang crashes, produce diagnostic information including the fully
 // preprocessed source file(s).  Request that the developer attach the
 // diagnostic information to a bug report.
-void Driver::generateCompilationDiagnostics(Compilation &C,
-                                            const Command &FailingCommand) {
+void Driver::generateCompilationDiagnostics(
+    Compilation &C, const Command &FailingCommand,
+    StringRef AdditionalInformation, CompilationDiagnosticReport *Report) {
   if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics))
     return;
 
@@ -954,9 +1253,6 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
 
   // If any of the preprocessing commands failed, clean up and exit.
   if (!FailingCommands.empty()) {
-    if (!isSaveTempsEnabled())
-      C.CleanupFileList(C.getTempFiles(), true);
-
     Diag(clang::diag::note_drv_command_failed_diag_msg)
         << "Error generating preprocessed source(s).";
     return;
@@ -978,6 +1274,8 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   SmallString<128> ReproCrashFilename;
   for (const char *TempFile : TempFiles) {
     Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile;
+    if (Report)
+      Report->TemporaryFiles.push_back(TempFile);
     if (ReproCrashFilename.empty()) {
       ReproCrashFilename = TempFile;
       llvm::sys::path::replace_extension(ReproCrashFilename, ".crash");
@@ -993,12 +1291,13 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   // Assume associated files are based off of the first temporary file.
   CrashReportInfo CrashInfo(TempFiles[0], VFS);
 
-  std::string Script = CrashInfo.Filename.rsplit('.').first.str() + ".sh";
+  llvm::SmallString<128> Script(CrashInfo.Filename);
+  llvm::sys::path::replace_extension(Script, "sh");
   std::error_code EC;
-  llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::F_Excl);
+  llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::CD_CreateNew);
   if (EC) {
     Diag(clang::diag::note_drv_command_failed_diag_msg)
-        << "Error generating run script: " + Script + " " + EC.message();
+        << "Error generating run script: " << Script << " " << EC.message();
   } else {
     ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n"
              << "# Driver args: ";
@@ -1006,6 +1305,11 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
     ScriptOS << "# Original command: ";
     Cmd.Print(ScriptOS, "\n", /*Quote=*/true);
     Cmd.Print(ScriptOS, "\n", /*Quote=*/true, &CrashInfo);
+    if (!AdditionalInformation.empty())
+      ScriptOS << "\n# Additional information: " << AdditionalInformation
+               << "\n";
+    if (Report)
+      Report->TemporaryFiles.push_back(Script.str());
     Diag(clang::diag::note_drv_command_failed_diag_msg) << Script;
   }
 
@@ -1066,9 +1370,6 @@ int Driver::ExecuteCompilation(
 
   C.ExecuteJobs(C.getJobs(), FailingCommands);
 
-  // Remove temp files.
-  C.CleanupFileList(C.getTempFiles());
-
   // If the command succeeded, we are done.
   if (FailingCommands.empty())
     return 0;
@@ -1144,6 +1445,10 @@ void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
 
   // Print out the install directory.
   OS << "InstalledDir: " << InstalledDir << '\n';
+
+  // If configuration file was used, print its path.
+  if (!ConfigFile.empty())
+    OS << "Configuration file: " << ConfigFile << '\n';
 }
 
 /// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
@@ -1155,53 +1460,66 @@ static void PrintDiagnosticCategories(raw_ostream &OS) {
     OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n';
 }
 
-void Driver::handleAutocompletions(StringRef PassedFlags) const {
+void Driver::HandleAutocompletions(StringRef PassedFlags) const {
+  if (PassedFlags == "")
+    return;
   // Print out all options that start with a given argument. This is used for
   // shell autocompletion.
   std::vector<std::string> SuggestedCompletions;
+  std::vector<std::string> Flags;
 
   unsigned short DisableFlags =
       options::NoDriverOption | options::Unsupported | options::Ignored;
-  // We want to show cc1-only options only when clang is invoked as "clang
-  // -cc1". When clang is invoked as "clang -cc1", we add "#" to the beginning
-  // of an --autocomplete  option so that the clang driver can distinguish
-  // whether it is requested to show cc1-only options or not.
-  if (PassedFlags.size() > 0 && PassedFlags[0] == '#') {
+
+  // Parse PassedFlags by "," as all the command-line flags are passed to this
+  // function separated by ","
+  StringRef TargetFlags = PassedFlags;
+  while (TargetFlags != "") {
+    StringRef CurFlag;
+    std::tie(CurFlag, TargetFlags) = TargetFlags.split(",");
+    Flags.push_back(std::string(CurFlag));
+  }
+
+  // We want to show cc1-only options only when clang is invoked with -cc1 or
+  // -Xclang.
+  if (std::find(Flags.begin(), Flags.end(), "-Xclang") != Flags.end() ||
+      std::find(Flags.begin(), Flags.end(), "-cc1") != Flags.end())
     DisableFlags &= ~options::NoDriverOption;
-    PassedFlags = PassedFlags.substr(1);
+
+  StringRef Cur;
+  Cur = Flags.at(Flags.size() - 1);
+  StringRef Prev;
+  if (Flags.size() >= 2) {
+    Prev = Flags.at(Flags.size() - 2);
+    SuggestedCompletions = Opts->suggestValueCompletions(Prev, Cur);
   }
 
-  if (PassedFlags.find(',') == StringRef::npos) {
+  if (SuggestedCompletions.empty())
+    SuggestedCompletions = Opts->suggestValueCompletions(Cur, "");
+
+  if (SuggestedCompletions.empty()) {
     // If the flag is in the form of "--autocomplete=-foo",
     // we were requested to print out all option names that start with "-foo".
     // For example, "--autocomplete=-fsyn" is expanded to "-fsyntax-only".
-    SuggestedCompletions = Opts->findByPrefix(PassedFlags, DisableFlags);
+    SuggestedCompletions = Opts->findByPrefix(Cur, DisableFlags);
 
     // We have to query the -W flags manually as they're not in the OptTable.
     // TODO: Find a good way to add them to OptTable instead and them remove
     // this code.
     for (StringRef S : DiagnosticIDs::getDiagnosticFlags())
-      if (S.startswith(PassedFlags))
+      if (S.startswith(Cur))
         SuggestedCompletions.push_back(S);
-  } else {
-    // If the flag is in the form of "--autocomplete=foo,bar", we were
-    // requested to print out all option values for "-foo" that start with
-    // "bar". For example,
-    // "--autocomplete=-stdlib=,l" is expanded to "libc++" and "libstdc++".
-    StringRef Option, Arg;
-    std::tie(Option, Arg) = PassedFlags.split(',');
-    SuggestedCompletions = Opts->suggestValueCompletions(Option, Arg);
   }
 
   // Sort the autocomplete candidates so that shells print them out in a
   // deterministic order. We could sort in any way, but we chose
   // case-insensitive sorting for consistency with the -help option
   // which prints out options in the case-insensitive alphabetical order.
-  std::sort(SuggestedCompletions.begin(), SuggestedCompletions.end(),
-            [](StringRef A, StringRef B) {
-              if (int X = A.compare_lower(B))
-                return X < 0;
-              return A.compare(B) > 0;
+  llvm::sort(SuggestedCompletions.begin(), SuggestedCompletions.end(),
+             [](StringRef A, StringRef B) {
+               if (int X = A.compare_lower(B))
+                 return X < 0;
+               return A.compare(B) > 0;
             });
 
   llvm::outs() << llvm::join(SuggestedCompletions, "\n") << '\n';
@@ -1250,6 +1568,15 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
     SuppressMissingInputWarning = true;
   }
 
+  if (C.getArgs().hasArg(options::OPT_v)) {
+    if (!SystemConfigDir.empty())
+      llvm::errs() << "System configuration file directory: "
+                   << SystemConfigDir << "\n";
+    if (!UserConfigDir.empty())
+      llvm::errs() << "User configuration file directory: "
+                   << UserConfigDir << "\n";
+  }
+
   const ToolChain &TC = C.getDefaultToolChain();
 
   if (C.getArgs().hasArg(options::OPT_v))
@@ -1295,13 +1622,19 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
   }
 
   if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) {
-    llvm::outs() << GetProgramPath(A->getValue(), TC) << "\n";
+    StringRef ProgName = A->getValue();
+
+    // Null program name cannot have a path.
+    if (! ProgName.empty())
+      llvm::outs() << GetProgramPath(ProgName, TC);
+
+    llvm::outs() << "\n";
     return false;
   }
 
   if (Arg *A = C.getArgs().getLastArg(options::OPT_autocomplete)) {
     StringRef PassedFlags = A->getValue();
-    handleAutocompletions(PassedFlags);
+    HandleAutocompletions(PassedFlags);
     return false;
   }
 
@@ -1427,7 +1760,7 @@ void Driver::PrintActions(const Compilation &C) const {
     PrintActions1(C, A, Ids);
 }
 
-/// \brief Check whether the given input tree contains any compilation or
+/// Check whether the given input tree contains any compilation or
 /// assembly actions.
 static bool ContainsCompileOrAssembleAction(const Action *A) {
   if (isa<CompileJobAction>(A) || isa<BackendJobAction>(A) ||
@@ -1528,7 +1861,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
   }
 }
 
-/// \brief Check that the file referenced by Value exists. If it doesn't,
+/// Check that the file referenced by Value exists. If it doesn't,
 /// issue a diagnostic and return false.
 static bool DiagnoseInputExistence(const Driver &D, const DerivedArgList &Args,
                                    StringRef Value, types::ID Ty) {
@@ -1823,9 +2156,10 @@ class OffloadingActionBuilder final {
     }
   };
 
-  /// \brief CUDA action builder. It injects device code in the host backend
-  /// action.
-  class CudaActionBuilder final : public DeviceActionBuilder {
+  /// Base class for CUDA/HIP action builder. It injects device code in
+  /// the host backend action.
+  class CudaActionBuilderBase : public DeviceActionBuilder {
+  protected:
     /// Flags to signal if the user requested host-only or device-only
     /// compilation.
     bool CompileHostOnly = false;
@@ -1842,115 +2176,11 @@ class OffloadingActionBuilder final {
 
     /// Flag that is set to true if this builder acted on the current input.
     bool IsActive = false;
-
   public:
-    CudaActionBuilder(Compilation &C, DerivedArgList &Args,
-                      const Driver::InputList &Inputs)
-        : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {}
-
-    ActionBuilderReturnCode
-    getDeviceDependences(OffloadAction::DeviceDependences &DA,
-                         phases::ID CurPhase, phases::ID FinalPhase,
-                         PhasesTy &Phases) override {
-      if (!IsActive)
-        return ABRT_Inactive;
-
-      // If we don't have more CUDA actions, we don't have any dependences to
-      // create for the host.
-      if (CudaDeviceActions.empty())
-        return ABRT_Success;
-
-      assert(CudaDeviceActions.size() == GpuArchList.size() &&
-             "Expecting one action per GPU architecture.");
-      assert(!CompileHostOnly &&
-             "Not expecting CUDA actions in host-only compilation.");
-
-      // If we are generating code for the device or we are in a backend phase,
-      // we attempt to generate the fat binary. We compile each arch to ptx and
-      // assemble to cubin, then feed the cubin *and* the ptx into a device
-      // "link" action, which uses fatbinary to combine these cubins into one
-      // fatbin.  The fatbin is then an input to the host action if not in
-      // device-only mode.
-      if (CompileDeviceOnly || CurPhase == phases::Backend) {
-        ActionList DeviceActions;
-        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
-          // Produce the device action from the current phase up to the assemble
-          // phase.
-          for (auto Ph : Phases) {
-            // Skip the phases that were already dealt with.
-            if (Ph < CurPhase)
-              continue;
-            // We have to be consistent with the host final phase.
-            if (Ph > FinalPhase)
-              break;
-
-            CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
-                C, Args, Ph, CudaDeviceActions[I]);
-
-            if (Ph == phases::Assemble)
-              break;
-          }
-
-          // If we didn't reach the assemble phase, we can't generate the fat
-          // binary. We don't need to generate the fat binary if we are not in
-          // device-only mode.
-          if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
-              CompileDeviceOnly)
-            continue;
-
-          Action *AssembleAction = CudaDeviceActions[I];
-          assert(AssembleAction->getType() == types::TY_Object);
-          assert(AssembleAction->getInputs().size() == 1);
-
-          Action *BackendAction = AssembleAction->getInputs()[0];
-          assert(BackendAction->getType() == types::TY_PP_Asm);
-
-          for (auto &A : {AssembleAction, BackendAction}) {
-            OffloadAction::DeviceDependences DDep;
-            DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
-                     Action::OFK_Cuda);
-            DeviceActions.push_back(
-                C.MakeAction<OffloadAction>(DDep, A->getType()));
-          }
-        }
-
-        // We generate the fat binary if we have device input actions.
-        if (!DeviceActions.empty()) {
-          CudaFatBinary =
-              C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
-
-          if (!CompileDeviceOnly) {
-            DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
-                   Action::OFK_Cuda);
-            // Clear the fat binary, it is already a dependence to an host
-            // action.
-            CudaFatBinary = nullptr;
-          }
-
-          // Remove the CUDA actions as they are already connected to an host
-          // action or fat binary.
-          CudaDeviceActions.clear();
-        }
-
-        // We avoid creating host action in device-only mode.
-        return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
-      } else if (CurPhase > phases::Backend) {
-        // If we are past the backend phase and still have a device action, we
-        // don't have to do anything as this action is already a device
-        // top-level action.
-        return ABRT_Success;
-      }
-
-      assert(CurPhase < phases::Backend && "Generating single CUDA "
-                                           "instructions should only occur "
-                                           "before the backend phase!");
-
-      // By default, we produce an action for each device arch.
-      for (Action *&A : CudaDeviceActions)
-        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
-
-      return ABRT_Success;
-    }
+    CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
+                          const Driver::InputList &Inputs,
+                          Action::OffloadKind OFKind)
+        : DeviceActionBuilder(C, Args, Inputs, OFKind) {}
 
     ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
       // While generating code for CUDA, we only depend on the host input action
@@ -1963,9 +2193,10 @@ class OffloadingActionBuilder final {
         assert(!GpuArchList.empty() &&
                "We should have at least one GPU architecture.");
 
-        // If the host input is not CUDA, we don't need to bother about this
-        // input.
-        if (IA->getType() != types::TY_CUDA) {
+        // If the host input is not CUDA or HIP, we don't need to bother about
+        // this input.
+        if (IA->getType() != types::TY_CUDA &&
+            IA->getType() != types::TY_HIP) {
           // The builder will ignore this input.
           IsActive = false;
           return ABRT_Inactive;
@@ -1978,10 +2209,24 @@ class OffloadingActionBuilder final {
           return ABRT_Success;
 
         // Replicate inputs for each GPU architecture.
-        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
-          CudaDeviceActions.push_back(C.MakeAction<InputAction>(
-              IA->getInputArg(), types::TY_CUDA_DEVICE));
+        auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE
+                                                 : types::TY_CUDA_DEVICE;
+        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+          CudaDeviceActions.push_back(
+              C.MakeAction<InputAction>(IA->getInputArg(), Ty));
+        }
+
+        return ABRT_Success;
+      }
 
+      // If this is an unbundling action use it as is for each CUDA toolchain.
+      if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
+        CudaDeviceActions.clear();
+        for (auto Arch : GpuArchList) {
+          CudaDeviceActions.push_back(UA);
+          UA->registerDependentActionInfo(ToolChains[0], CudaArchToString(Arch),
+                                          AssociatedOffloadKind);
+        }
         return ABRT_Success;
       }
 
@@ -1993,7 +2238,7 @@ class OffloadingActionBuilder final {
       auto AddTopLevel = [&](Action *A, CudaArch BoundArch) {
         OffloadAction::DeviceDependences Dep;
         Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch),
-                Action::OFK_Cuda);
+                AssociatedOffloadKind);
         AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
       };
 
@@ -2022,21 +2267,35 @@ class OffloadingActionBuilder final {
     }
 
     bool initialize() override {
+      assert(AssociatedOffloadKind == Action::OFK_Cuda ||
+             AssociatedOffloadKind == Action::OFK_HIP);
+
       // We don't need to support CUDA.
-      if (!C.hasOffloadToolChain<Action::OFK_Cuda>())
+      if (AssociatedOffloadKind == Action::OFK_Cuda &&
+          !C.hasOffloadToolChain<Action::OFK_Cuda>())
+        return false;
+
+      // We don't need to support HIP.
+      if (AssociatedOffloadKind == Action::OFK_HIP &&
+          !C.hasOffloadToolChain<Action::OFK_HIP>())
         return false;
 
       const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
       assert(HostTC && "No toolchain for host compilation.");
-      if (HostTC->getTriple().isNVPTX()) {
-        // We do not support targeting NVPTX for host compilation. Throw
+      if (HostTC->getTriple().isNVPTX() ||
+          HostTC->getTriple().getArch() == llvm::Triple::amdgcn) {
+        // We do not support targeting NVPTX/AMDGCN for host compilation. Throw
         // an error and abort pipeline construction early so we don't trip
         // asserts that assume device-side compilation.
-        C.getDriver().Diag(diag::err_drv_cuda_nvptx_host);
+        C.getDriver().Diag(diag::err_drv_cuda_host_arch)
+            << HostTC->getTriple().getArchName();
         return true;
       }
 
-      ToolChains.push_back(C.getSingleOffloadToolChain<Action::OFK_Cuda>());
+      ToolChains.push_back(
+          AssociatedOffloadKind == Action::OFK_Cuda
+              ? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
+              : C.getSingleOffloadToolChain<Action::OFK_HIP>());
 
       Arg *PartialCompilationArg = Args.getLastArg(
           options::OPT_cuda_host_only, options::OPT_cuda_device_only,
@@ -2089,6 +2348,187 @@ class OffloadingActionBuilder final {
     }
   };
 
+  /// \brief CUDA action builder. It injects device code in the host backend
+  /// action.
+  class CudaActionBuilder final : public CudaActionBuilderBase {
+  public:
+    CudaActionBuilder(Compilation &C, DerivedArgList &Args,
+                      const Driver::InputList &Inputs)
+        : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) {}
+
+    ActionBuilderReturnCode
+    getDeviceDependences(OffloadAction::DeviceDependences &DA,
+                         phases::ID CurPhase, phases::ID FinalPhase,
+                         PhasesTy &Phases) override {
+      if (!IsActive)
+        return ABRT_Inactive;
+
+      // If we don't have more CUDA actions, we don't have any dependences to
+      // create for the host.
+      if (CudaDeviceActions.empty())
+        return ABRT_Success;
+
+      assert(CudaDeviceActions.size() == GpuArchList.size() &&
+             "Expecting one action per GPU architecture.");
+      assert(!CompileHostOnly &&
+             "Not expecting CUDA actions in host-only compilation.");
+
+      // If we are generating code for the device or we are in a backend phase,
+      // we attempt to generate the fat binary. We compile each arch to ptx and
+      // assemble to cubin, then feed the cubin *and* the ptx into a device
+      // "link" action, which uses fatbinary to combine these cubins into one
+      // fatbin.  The fatbin is then an input to the host action if not in
+      // device-only mode.
+      if (CompileDeviceOnly || CurPhase == phases::Backend) {
+        ActionList DeviceActions;
+        for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+          // Produce the device action from the current phase up to the assemble
+          // phase.
+          for (auto Ph : Phases) {
+            // Skip the phases that were already dealt with.
+            if (Ph < CurPhase)
+              continue;
+            // We have to be consistent with the host final phase.
+            if (Ph > FinalPhase)
+              break;
+
+            CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction(
+                C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda);
+
+            if (Ph == phases::Assemble)
+              break;
+          }
+
+          // If we didn't reach the assemble phase, we can't generate the fat
+          // binary. We don't need to generate the fat binary if we are not in
+          // device-only mode.
+          if (!isa<AssembleJobAction>(CudaDeviceActions[I]) ||
+              CompileDeviceOnly)
+            continue;
+
+          Action *AssembleAction = CudaDeviceActions[I];
+          assert(AssembleAction->getType() == types::TY_Object);
+          assert(AssembleAction->getInputs().size() == 1);
+
+          Action *BackendAction = AssembleAction->getInputs()[0];
+          assert(BackendAction->getType() == types::TY_PP_Asm);
+
+          for (auto &A : {AssembleAction, BackendAction}) {
+            OffloadAction::DeviceDependences DDep;
+            DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]),
+                     Action::OFK_Cuda);
+            DeviceActions.push_back(
+                C.MakeAction<OffloadAction>(DDep, A->getType()));
+          }
+        }
+
+        // We generate the fat binary if we have device input actions.
+        if (!DeviceActions.empty()) {
+          CudaFatBinary =
+              C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN);
+
+          if (!CompileDeviceOnly) {
+            DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr,
+                   Action::OFK_Cuda);
+            // Clear the fat binary, it is already a dependence to an host
+            // action.
+            CudaFatBinary = nullptr;
+          }
+
+          // Remove the CUDA actions as they are already connected to an host
+          // action or fat binary.
+          CudaDeviceActions.clear();
+        }
+
+        // We avoid creating host action in device-only mode.
+        return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success;
+      } else if (CurPhase > phases::Backend) {
+        // If we are past the backend phase and still have a device action, we
+        // don't have to do anything as this action is already a device
+        // top-level action.
+        return ABRT_Success;
+      }
+
+      assert(CurPhase < phases::Backend && "Generating single CUDA "
+                                           "instructions should only occur "
+                                           "before the backend phase!");
+
+      // By default, we produce an action for each device arch.
+      for (Action *&A : CudaDeviceActions)
+        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A);
+
+      return ABRT_Success;
+    }
+  };
+  /// \brief HIP action builder. It injects device code in the host backend
+  /// action.
+  class HIPActionBuilder final : public CudaActionBuilderBase {
+    /// The linker inputs obtained for each device arch.
+    SmallVector<ActionList, 8> DeviceLinkerInputs;
+
+  public:
+    HIPActionBuilder(Compilation &C, DerivedArgList &Args,
+                     const Driver::InputList &Inputs)
+        : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
+
+    bool canUseBundlerUnbundler() const override { return true; }
+
+    ActionBuilderReturnCode
+    getDeviceDependences(OffloadAction::DeviceDependences &DA,
+                         phases::ID CurPhase, phases::ID FinalPhase,
+                         PhasesTy &Phases) override {
+      // amdgcn does not support linking of object files, therefore we skip
+      // backend and assemble phases to output LLVM IR.
+      if (CudaDeviceActions.empty() || CurPhase == phases::Backend ||
+          CurPhase == phases::Assemble)
+        return ABRT_Success;
+
+      assert((CurPhase == phases::Link ||
+              CudaDeviceActions.size() == GpuArchList.size()) &&
+             "Expecting one action per GPU architecture.");
+      assert(!CompileHostOnly &&
+             "Not expecting CUDA actions in host-only compilation.");
+
+      // Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch.
+      // This happens to each device action originated from each input file.
+      // Later on, device actions in DeviceLinkerInputs are used to create
+      // device link actions in appendLinkDependences and the created device
+      // link actions are passed to the offload action as device dependence.
+      if (CurPhase == phases::Link) {
+        DeviceLinkerInputs.resize(CudaDeviceActions.size());
+        auto LI = DeviceLinkerInputs.begin();
+        for (auto *A : CudaDeviceActions) {
+          LI->push_back(A);
+          ++LI;
+        }
+
+        // We will pass the device action as a host dependence, so we don't
+        // need to do anything else with them.
+        CudaDeviceActions.clear();
+        return ABRT_Success;
+      }
+
+      // By default, we produce an action for each device arch.
+      for (Action *&A : CudaDeviceActions)
+        A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
+                                               AssociatedOffloadKind);
+
+      return ABRT_Success;
+    }
+
+    void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
+      // Append a new link action for each device.
+      unsigned I = 0;
+      for (auto &LI : DeviceLinkerInputs) {
+        auto *DeviceLinkAction =
+            C.MakeAction<LinkJobAction>(LI, types::TY_Image);
+        DA.add(*DeviceLinkAction, *ToolChains[0],
+               CudaArchToString(GpuArchList[I]), AssociatedOffloadKind);
+        ++I;
+      }
+    }
+  };
+
   /// OpenMP action builder. The host bitcode is passed to the device frontend
   /// and all the device linked images are passed to the host link phase.
   class OpenMPActionBuilder final : public DeviceActionBuilder {
@@ -2255,6 +2695,9 @@ public:
     // Create a specialized builder for CUDA.
     SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs));
 
+    // Create a specialized builder for HIP.
+    SpecializedBuilders.push_back(new HIPActionBuilder(C, Args, Inputs));
+
     // Create a specialized builder for OpenMP.
     SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));
 
@@ -2549,22 +2992,6 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
     Args.eraseArg(options::OPT__SLASH_Yu);
     YcArg = YuArg = nullptr;
   }
-  if (YcArg || YuArg) {
-    StringRef Val = YcArg ? YcArg->getValue() : YuArg->getValue();
-    bool FoundMatchingInclude = false;
-    for (const Arg *Inc : Args.filtered(options::OPT_include)) {
-      // FIXME: Do case-insensitive matching and consider / and \ as equal.
-      if (Inc->getValue() == Val)
-        FoundMatchingInclude = true;
-    }
-    if (!FoundMatchingInclude) {
-      Diag(clang::diag::warn_drv_ycyu_no_fi_arg_clang_cl)
-          << (YcArg ? YcArg : YuArg)->getSpelling();
-      Args.eraseArg(options::OPT__SLASH_Yc);
-      Args.eraseArg(options::OPT__SLASH_Yu);
-      YcArg = YuArg = nullptr;
-    }
-  }
   if (YcArg && Inputs.size() > 1) {
     Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl);
     Args.eraseArg(options::OPT__SLASH_Yc);
@@ -2597,6 +3024,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
     // this compilation, warn the user about it.
     phases::ID InitialPhase = PL[0];
     if (InitialPhase > FinalPhase) {
+      if (InputArg->isClaimed())
+        continue;
+
       // Claim here to avoid the more general unused warning.
       InputArg->claim();
 
@@ -2631,11 +3061,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
         const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType);
         llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PCHPL;
         types::getCompilationPhases(HeaderType, PCHPL);
-        Arg *PchInputArg = MakeInputArg(Args, *Opts, YcArg->getValue());
-
         // Build the pipeline for the pch file.
         Action *ClangClPch =
-            C.MakeAction<InputAction>(*PchInputArg, HeaderType);
+            C.MakeAction<InputAction>(*InputArg, HeaderType);
         for (phases::ID Phase : PCHPL)
           ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch);
         assert(ClangClPch);
@@ -2643,6 +3071,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
         // The driver currently exits after the first failed command.  This
         // relies on that behavior, to make sure if the pch generation fails,
         // the main compilation won't run.
+        // FIXME: If the main compilation fails, the PCH generation should
+        // probably not be considered successful either.
       }
     }
 
@@ -2725,8 +3155,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
   Args.ClaimAllArgs(options::OPT_cuda_compile_host_device);
 }
 
-Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
-                                     phases::ID Phase, Action *Input) const {
+Action *Driver::ConstructPhaseAction(
+    Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input,
+    Action::OffloadKind TargetDeviceOffloadKind) const {
   llvm::PrettyStackTraceString CrashInfo("Constructing phase actions");
 
   // Some types skip the assembler phase (e.g., llvm-bc), but we can't
@@ -2788,7 +3219,7 @@ Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args,
     return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
   }
   case phases::Backend: {
-    if (isUsingLTO()) {
+    if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) {
       types::ID Output =
           Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return C.MakeAction<BackendJobAction>(Input, Output);
@@ -3084,19 +3515,34 @@ class ToolSelector final {
   const Tool *combineBackendCompile(ArrayRef<JobActionInfo> ActionInfo,
                                     const ActionList *&Inputs,
                                     ActionList &CollapsedOffloadAction) {
-    if (ActionInfo.size() < 2 || !canCollapsePreprocessorAction())
+    if (ActionInfo.size() < 2)
       return nullptr;
     auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[0].JA);
     auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[1].JA);
     if (!BJ || !CJ)
       return nullptr;
 
+    // Check if the initial input (to the compile job or its predessor if one
+    // exists) is LLVM bitcode. In that case, no preprocessor step is required
+    // and we can still collapse the compile and backend jobs when we have
+    // -save-temps. I.e. there is no need for a separate compile job just to
+    // emit unoptimized bitcode.
+    bool InputIsBitcode = true;
+    for (size_t i = 1; i < ActionInfo.size(); i++)
+      if (ActionInfo[i].JA->getType() != types::TY_LLVM_BC &&
+          ActionInfo[i].JA->getType() != types::TY_LTO_BC) {
+        InputIsBitcode = false;
+        break;
+      }
+    if (!InputIsBitcode && !canCollapsePreprocessorAction())
+      return nullptr;
+
     // Get compiler tool.
     const Tool *T = TC.SelectTool(*CJ);
     if (!T)
       return nullptr;
 
-    if (T->canEmitIR() && (SaveTemps || EmbedBitcode))
+    if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) || EmbedBitcode))
       return nullptr;
 
     Inputs = &CJ->getInputs();
@@ -3312,7 +3758,7 @@ InputInfo Driver::BuildJobsForActionNoCache(
 
     if (!ArchName.empty())
       TC = &getToolChain(C.getArgs(),
-                         computeTargetTriple(*this, DefaultTargetTriple,
+                         computeTargetTriple(*this, TargetTriple,
                                              C.getArgs(), ArchName));
     else
       TC = &C.getDefaultToolChain();
@@ -3407,18 +3853,30 @@ InputInfo Driver::BuildJobsForActionNoCache(
           UI.DependentToolChain->getTriple().normalize(),
           /*CreatePrefixForHost=*/true);
       auto CurI = InputInfo(
-          UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch,
-                                 /*AtTopLevel=*/false, MultipleArchs,
-                                 OffloadingPrefix),
+          UA,
+          GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch,
+                             /*AtTopLevel=*/false,
+                             MultipleArchs ||
+                                 UI.DependentOffloadKind == Action::OFK_HIP,
+                             OffloadingPrefix),
           BaseInput);
       // Save the unbundling result.
       UnbundlingResults.push_back(CurI);
 
       // Get the unique string identifier for this dependence and cache the
       // result.
-      CachedResults[{A, GetTriplePlusArchString(
-                            UI.DependentToolChain, BoundArch,
-                            UI.DependentOffloadKind)}] = CurI;
+      StringRef Arch;
+      if (TargetDeviceOffloadKind == Action::OFK_HIP) {
+        if (UI.DependentOffloadKind == Action::OFK_Host)
+          Arch = StringRef();
+        else
+          Arch = UI.DependentBoundArch;
+      } else
+        Arch = BoundArch;
+
+      CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch,
+                                                UI.DependentOffloadKind)}] =
+          CurI;
     }
 
     // Now that we have all the results generated, select the one that should be
@@ -3478,11 +3936,11 @@ InputInfo Driver::BuildJobsForActionNoCache(
 }
 
 const char *Driver::getDefaultImageName() const {
-  llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple));
+  llvm::Triple Target(llvm::Triple::normalize(TargetTriple));
   return Target.isOSWindows() ? "a.exe" : "a.out";
 }
 
-/// \brief Create output filename based on ArgValue, which could either be a
+/// Create output filename based on ArgValue, which could either be a
 /// full filename, filename without extension, or a directory. If ArgValue
 /// does not provide a filename, then use BaseName, and use the extension
 /// suitable for FileType.
@@ -3540,8 +3998,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
   }
 
   // Default to writing to stdout?
-  if (AtTopLevel && !CCGenDiagnostics &&
-      (isa<PreprocessJobAction>(JA) || JA.getType() == types::TY_ModuleFile))
+  if (AtTopLevel && !CCGenDiagnostics && isa<PreprocessJobAction>(JA))
     return "-";
 
   // Is this the assembly listing for /FA?
@@ -3562,8 +4019,22 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
       CCGenDiagnostics) {
     StringRef Name = llvm::sys::path::filename(BaseInput);
     std::pair<StringRef, StringRef> Split = Name.split('.');
-    std::string TmpName = GetTemporaryPath(
-        Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode()));
+    SmallString<128> TmpName;
+    const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode());
+    Arg *A = C.getArgs().getLastArg(options::OPT_fcrash_diagnostics_dir);
+    if (CCGenDiagnostics && A) {
+      SmallString<128> CrashDirectory(A->getValue());
+      llvm::sys::path::append(CrashDirectory, Split.first);
+      const char *Middle = Suffix ? "-%%%%%%." : "-%%%%%%";
+      std::error_code EC =
+          llvm::sys::fs::createUniqueFile(CrashDirectory + Middle + Suffix, TmpName);
+      if (EC) {
+        Diag(clang::diag::err_unable_to_make_temp) << EC.message();
+        return "";
+      }
+    } else {
+      TmpName = GetTemporaryPath(Split.first, Suffix);
+    }
     return C.addTempFile(C.getArgs().MakeArgString(TmpName));
   }
 
@@ -3717,14 +4188,14 @@ std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const {
 void Driver::generatePrefixedToolNames(
     StringRef Tool, const ToolChain &TC,
     SmallVectorImpl<std::string> &Names) const {
-  // FIXME: Needs a better variable than DefaultTargetTriple
-  Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());
+  // FIXME: Needs a better variable than TargetTriple
+  Names.emplace_back((TargetTriple + "-" + Tool).str());
   Names.emplace_back(Tool);
 
   // Allow the discovery of tools prefixed with LLVM's default target triple.
-  std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple();
-  if (LLVMDefaultTargetTriple != DefaultTargetTriple)
-    Names.emplace_back((LLVMDefaultTargetTriple + "-" + Tool).str());
+  std::string DefaultTargetTriple = llvm::sys::getDefaultTargetTriple();
+  if (DefaultTargetTriple != TargetTriple)
+    Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());
 }
 
 static bool ScanDirForExecutable(SmallString<128> &Dir,
@@ -3795,6 +4266,9 @@ std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const {
     // extension of .pch is assumed. "
     if (!llvm::sys::path::has_extension(Output))
       Output += ".pch";
+  } else if (Arg *YcArg = C.getArgs().getLastArg(options::OPT__SLASH_Yc)) {
+    Output = YcArg->getValue();
+    llvm::sys::path::replace_extension(Output, ".pch");
   } else {
     Output = BaseName;
     llvm::sys::path::replace_extension(Output, ".pch");