diff options
Diffstat (limited to 'clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp')
-rw-r--r-- | clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 281 |
1 files changed, 42 insertions, 239 deletions
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index efcd565b510b..1a8e4294713c 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -29,230 +29,6 @@ using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; -namespace { - -static const char *getOutputFileName(Compilation &C, StringRef Base, - const char *Postfix, - const char *Extension) { - const char *OutputFileName; - if (C.getDriver().isSaveTempsEnabled()) { - OutputFileName = - C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension); - } else { - std::string TmpName = - C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension); - OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName)); - } - return OutputFileName; -} - -static void addLLCOptArg(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) { - if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { - StringRef OOpt = "0"; - if (A->getOption().matches(options::OPT_O4) || - A->getOption().matches(options::OPT_Ofast)) - OOpt = "3"; - else if (A->getOption().matches(options::OPT_O0)) - OOpt = "0"; - else if (A->getOption().matches(options::OPT_O)) { - // Clang and opt support -Os/-Oz; llc only supports -O0, -O1, -O2 and -O3 - // so we map -Os/-Oz to -O2. - // Only clang supports -Og, and maps it to -O1. - // We map anything else to -O2. - OOpt = llvm::StringSwitch<const char *>(A->getValue()) - .Case("1", "1") - .Case("2", "2") - .Case("3", "3") - .Case("s", "2") - .Case("z", "2") - .Case("g", "1") - .Default("0"); - } - CmdArgs.push_back(Args.MakeArgString("-O" + OOpt)); - } -} - -static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC, - std::string &GPUArch) { - if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) { - std::string ErrMsg = - llvm::formatv("{0}", llvm::fmt_consume(std::move(Err))); - TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg; - return false; - } - - return true; -} -} // namespace - -const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand( - const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC, Compilation &C, - const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, - StringRef SubArchName, StringRef OutputFilePrefix) const { - ArgStringList CmdArgs; - - for (const auto &II : Inputs) - if (II.isFilename()) - CmdArgs.push_back(II.getFilename()); - - bool HasLibm = false; - if (Args.hasArg(options::OPT_l)) { - auto Lm = Args.getAllArgValues(options::OPT_l); - for (auto &Lib : Lm) { - if (Lib == "m") { - HasLibm = true; - break; - } - } - - if (HasLibm) { - // This is not certain to work. The device libs added here, and passed to - // llvm-link, are missing attributes that they expect to be inserted when - // passed to mlink-builtin-bitcode. The amdgpu backend does not generate - // conservatively correct code when attributes are missing, so this may - // be the root cause of miscompilations. Passing via mlink-builtin-bitcode - // ultimately hits CodeGenModule::addDefaultFunctionDefinitionAttributes - // on each function, see D28538 for context. - // Potential workarounds: - // - unconditionally link all of the device libs to every translation - // unit in clang via mlink-builtin-bitcode - // - build a libm bitcode file as part of the DeviceRTL and explictly - // mlink-builtin-bitcode the rocm device libs components at build time - // - drop this llvm-link fork in favour or some calls into LLVM, chosen - // to do basically the same work as llvm-link but with that call first - // - write an opt pass that sets that on every function it sees and pipe - // the device-libs bitcode through that on the way to this llvm-link - SmallVector<std::string, 12> BCLibs = - AMDGPUOpenMPTC.getCommonDeviceLibNames(Args, SubArchName.str()); - for (StringRef BCFile : BCLibs) - CmdArgs.push_back(Args.MakeArgString(BCFile)); - } - } - - AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn", - SubArchName, /*isBitCodeSDL=*/true, - /*postClangLink=*/false); - // Add an intermediate output file. - CmdArgs.push_back("-o"); - const char *OutputFileName = - getOutputFileName(C, OutputFilePrefix, "-linked", "bc"); - CmdArgs.push_back(OutputFileName); - const char *Exec = - Args.MakeArgString(getToolChain().GetProgramPath("llvm-link")); - C.addCommand(std::make_unique<Command>( - JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, - InputInfo(&JA, Args.MakeArgString(OutputFileName)))); - - // If we linked in libm definitions late we run another round of optimizations - // to inline the definitions and fold what is foldable. - if (HasLibm) { - ArgStringList OptCmdArgs; - const char *OptOutputFileName = - getOutputFileName(C, OutputFilePrefix, "-linked-opt", "bc"); - addLLCOptArg(Args, OptCmdArgs); - OptCmdArgs.push_back(OutputFileName); - OptCmdArgs.push_back("-o"); - OptCmdArgs.push_back(OptOutputFileName); - const char *OptExec = - Args.MakeArgString(getToolChain().GetProgramPath("opt")); - C.addCommand(std::make_unique<Command>( - JA, *this, ResponseFileSupport::AtFileCurCP(), OptExec, OptCmdArgs, - InputInfo(&JA, Args.MakeArgString(OutputFileName)), - InputInfo(&JA, Args.MakeArgString(OptOutputFileName)))); - OutputFileName = OptOutputFileName; - } - - return OutputFileName; -} - -const char *AMDGCN::OpenMPLinker::constructLlcCommand( - Compilation &C, const JobAction &JA, const InputInfoList &Inputs, - const llvm::opt::ArgList &Args, llvm::StringRef SubArchName, - llvm::StringRef OutputFilePrefix, const char *InputFileName, - bool OutputIsAsm) const { - // Construct llc command. - ArgStringList LlcArgs; - // The input to llc is the output from opt. - LlcArgs.push_back(InputFileName); - // Pass optimization arg to llc. - addLLCOptArg(Args, LlcArgs); - LlcArgs.push_back("-mtriple=amdgcn-amd-amdhsa"); - LlcArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName)); - LlcArgs.push_back( - Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj"))); - - for (const Arg *A : Args.filtered(options::OPT_mllvm)) { - LlcArgs.push_back(A->getValue(0)); - } - - // Add output filename - LlcArgs.push_back("-o"); - const char *LlcOutputFile = - getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o"); - LlcArgs.push_back(LlcOutputFile); - const char *Llc = Args.MakeArgString(getToolChain().GetProgramPath("llc")); - C.addCommand(std::make_unique<Command>( - JA, *this, ResponseFileSupport::AtFileCurCP(), Llc, LlcArgs, Inputs, - InputInfo(&JA, Args.MakeArgString(LlcOutputFile)))); - return LlcOutputFile; -} - -void AMDGCN::OpenMPLinker::constructLldCommand( - Compilation &C, const JobAction &JA, const InputInfoList &Inputs, - const InputInfo &Output, const llvm::opt::ArgList &Args, - const char *InputFileName) const { - // Construct lld command. - // The output from ld.lld is an HSA code object file. - ArgStringList LldArgs{"-flavor", "gnu", "--no-undefined", - "-shared", "-o", Output.getFilename(), - InputFileName}; - - const char *Lld = Args.MakeArgString(getToolChain().GetProgramPath("lld")); - C.addCommand(std::make_unique<Command>( - JA, *this, ResponseFileSupport::AtFileCurCP(), Lld, LldArgs, Inputs, - InputInfo(&JA, Args.MakeArgString(Output.getFilename())))); -} - -// For amdgcn the inputs of the linker job are device bitcode and output is -// object file. It calls llvm-link, opt, llc, then lld steps. -void AMDGCN::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA, - const InputInfo &Output, - const InputInfoList &Inputs, - const ArgList &Args, - const char *LinkingOutput) const { - const ToolChain &TC = getToolChain(); - assert(getToolChain().getTriple().isAMDGCN() && "Unsupported target"); - - const toolchains::AMDGPUOpenMPToolChain &AMDGPUOpenMPTC = - static_cast<const toolchains::AMDGPUOpenMPToolChain &>(TC); - - std::string GPUArch = Args.getLastArgValue(options::OPT_march_EQ).str(); - if (GPUArch.empty()) { - if (!checkSystemForAMDGPU(Args, AMDGPUOpenMPTC, GPUArch)) - return; - } - - // Prefix for temporary file name. - std::string Prefix; - for (const auto &II : Inputs) - if (II.isFilename()) - Prefix = llvm::sys::path::stem(II.getFilename()).str() + "-" + GPUArch; - assert(Prefix.length() && "no linker inputs are files "); - - // Each command outputs different files. - const char *LLVMLinkCommand = constructLLVMLinkCommand( - AMDGPUOpenMPTC, C, JA, Inputs, Args, GPUArch, Prefix); - - // Produce readable assembly if save-temps is enabled. - if (C.getDriver().isSaveTempsEnabled()) - constructLlcCommand(C, JA, Inputs, Args, GPUArch, Prefix, LLVMLinkCommand, - /*OutputIsAsm=*/true); - const char *LlcCommand = constructLlcCommand(C, JA, Inputs, Args, GPUArch, - Prefix, LLVMLinkCommand); - constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand); -} - AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, const llvm::Triple &Triple, const ToolChain &HostTC, @@ -268,11 +44,8 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( Action::OffloadKind DeviceOffloadingKind) const { HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind); - std::string GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ).str(); - if (GPUArch.empty()) { - if (!checkSystemForAMDGPU(DriverArgs, *this, GPUArch)) - return; - } + StringRef GPUArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); + assert(!GPUArch.empty() && "Must have an explicit GPU arch."); assert(DeviceOffloadingKind == Action::OFK_OpenMP && "Only OpenMP offloading kinds are supported."); @@ -284,11 +57,15 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( if (DriverArgs.hasArg(options::OPT_nogpulib)) return; + for (auto BCFile : getDeviceLibs(DriverArgs)) { + CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" + : "-mlink-bitcode-file"); + CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); + } + // Link the bitcode library late if we're using device LTO. if (getDriver().isUsingLTO(/* IsOffload */ true)) return; - - addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, GPUArch, getTriple()); } llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( @@ -307,9 +84,19 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( DAL->append(A); if (!DAL->hasArg(options::OPT_march_EQ)) { - std::string Arch = BoundArch.str(); - if (BoundArch.empty()) - checkSystemForAMDGPU(Args, *this, Arch); + StringRef Arch = BoundArch; + if (Arch.empty()) { + auto ArchsOrErr = getSystemGPUArchs(Args); + if (!ArchsOrErr) { + std::string ErrMsg = + llvm::formatv("{0}", llvm::fmt_consume(ArchsOrErr.takeError())); + getDriver().Diag(diag::err_drv_undetermined_gpu_arch) + << llvm::Triple::getArchTypeName(getArch()) << ErrMsg << "-march"; + Arch = CudaArchToString(CudaArch::HIPDefault); + } else { + Arch = Args.MakeArgString(ArchsOrErr->front()); + } + } DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), Arch); } @@ -329,11 +116,6 @@ llvm::opt::DerivedArgList *AMDGPUOpenMPToolChain::TranslateArgs( return DAL; } -Tool *AMDGPUOpenMPToolChain::buildLinker() const { - assert(getTriple().isAMDGCN()); - return new tools::AMDGCN::OpenMPLinker(*this); -} - void AMDGPUOpenMPToolChain::addClangWarningOptions( ArgStringList &CC1Args) const { HostTC.addClangWarningOptions(CC1Args); @@ -372,3 +154,24 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, const ArgList &Args) const { return HostTC.computeMSVCVersion(D, Args); } + +llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> +AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const { + if (Args.hasArg(options::OPT_nogpulib)) + return {}; + + if (!RocmInstallation.hasDeviceLibrary()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; + return {}; + } + + StringRef GpuArch = getProcessorFromTargetID( + getTriple(), Args.getLastArgValue(options::OPT_march_EQ)); + + SmallVector<BitCodeLibraryInfo, 12> BCLibs; + for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(), + /*IsOpenMP=*/true)) + BCLibs.emplace_back(BCLib); + + return BCLibs; +} |