diff options
Diffstat (limited to 'clang/lib/Driver/ToolChains/AMDGPU.cpp')
-rw-r--r-- | clang/lib/Driver/ToolChains/AMDGPU.cpp | 483 |
1 files changed, 481 insertions, 2 deletions
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 71a2c68b4197..bc6d1fcd4a00 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -12,6 +12,8 @@ #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" #include "llvm/Option/ArgList.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" using namespace clang::driver; using namespace clang::driver::tools; @@ -19,6 +21,327 @@ using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; +void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { + assert(!Path.empty()); + + const StringRef Suffix(".bc"); + const StringRef Suffix2(".amdgcn.bc"); + + std::error_code EC; + for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef FilePath = LI->path(); + StringRef FileName = llvm::sys::path::filename(FilePath); + if (!FileName.endswith(Suffix)) + continue; + + StringRef BaseName; + if (FileName.endswith(Suffix2)) + BaseName = FileName.drop_back(Suffix2.size()); + else if (FileName.endswith(Suffix)) + BaseName = FileName.drop_back(Suffix.size()); + + if (BaseName == "ocml") { + OCML = FilePath; + } else if (BaseName == "ockl") { + OCKL = FilePath; + } else if (BaseName == "opencl") { + OpenCL = FilePath; + } else if (BaseName == "hip") { + HIP = FilePath; + } else if (BaseName == "oclc_finite_only_off") { + FiniteOnly.Off = FilePath; + } else if (BaseName == "oclc_finite_only_on") { + FiniteOnly.On = FilePath; + } else if (BaseName == "oclc_daz_opt_on") { + DenormalsAreZero.On = FilePath; + } else if (BaseName == "oclc_daz_opt_off") { + DenormalsAreZero.Off = FilePath; + } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { + CorrectlyRoundedSqrt.On = FilePath; + } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { + CorrectlyRoundedSqrt.Off = FilePath; + } else if (BaseName == "oclc_unsafe_math_on") { + UnsafeMath.On = FilePath; + } else if (BaseName == "oclc_unsafe_math_off") { + UnsafeMath.Off = FilePath; + } else if (BaseName == "oclc_wavefrontsize64_on") { + WavefrontSize64.On = FilePath; + } else if (BaseName == "oclc_wavefrontsize64_off") { + WavefrontSize64.Off = FilePath; + } else { + // Process all bitcode filenames that look like + // ocl_isa_version_XXX.amdgcn.bc + const StringRef DeviceLibPrefix = "oclc_isa_version_"; + if (!BaseName.startswith(DeviceLibPrefix)) + continue; + + StringRef IsaVersionNumber = + BaseName.drop_front(DeviceLibPrefix.size()); + + llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; + SmallString<8> Tmp; + LibDeviceMap.insert( + std::make_pair(GfxName.toStringRef(Tmp), FilePath.str())); + } + } +} + +void RocmInstallationDetector::ParseHIPVersionFile(llvm::StringRef V) { + SmallVector<StringRef, 4> VersionParts; + V.split(VersionParts, '\n'); + unsigned Major; + unsigned Minor; + for (auto Part : VersionParts) { + auto Splits = Part.split('='); + if (Splits.first == "HIP_VERSION_MAJOR") + Splits.second.getAsInteger(0, Major); + else if (Splits.first == "HIP_VERSION_MINOR") + Splits.second.getAsInteger(0, Minor); + else if (Splits.first == "HIP_VERSION_PATCH") + VersionPatch = Splits.second.str(); + } + VersionMajorMinor = llvm::VersionTuple(Major, Minor); + DetectedVersion = + (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); +} + +// For candidate specified by --rocm-path we do not do strict check. +SmallVector<RocmInstallationDetector::Candidate, 4> +RocmInstallationDetector::getInstallationPathCandidates() { + SmallVector<Candidate, 4> Candidates; + if (!RocmPathArg.empty()) { + Candidates.emplace_back(RocmPathArg.str()); + return Candidates; + } + + // Try to find relative to the compiler binary. + const char *InstallDir = D.getInstalledDir(); + + // Check both a normal Unix prefix position of the clang binary, as well as + // the Windows-esque layout the ROCm packages use with the host architecture + // subdirectory of bin. + + // Strip off directory (usually bin) + StringRef ParentDir = llvm::sys::path::parent_path(InstallDir); + StringRef ParentName = llvm::sys::path::filename(ParentDir); + + // Some builds use bin/{host arch}, so go up again. + if (ParentName == "bin") { + ParentDir = llvm::sys::path::parent_path(ParentDir); + ParentName = llvm::sys::path::filename(ParentDir); + } + + // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin + if (ParentName == "llvm") + ParentDir = llvm::sys::path::parent_path(ParentDir); + + Candidates.emplace_back(ParentDir.str(), /*StrictChecking=*/true); + + // Device library may be installed in clang resource directory. + Candidates.emplace_back(D.ResourceDir, /*StrictChecking=*/true); + + Candidates.emplace_back(D.SysRoot + "/opt/rocm", /*StrictChecking=*/true); + return Candidates; +} + +RocmInstallationDetector::RocmInstallationDetector( + const Driver &D, const llvm::Triple &HostTriple, + const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) + : D(D) { + RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ); + RocmDeviceLibPathArg = + Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); + if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { + HIPVersionArg = A->getValue(); + unsigned Major = 0; + unsigned Minor = 0; + SmallVector<StringRef, 3> Parts; + HIPVersionArg.split(Parts, '.'); + if (Parts.size()) + Parts[0].getAsInteger(0, Major); + if (Parts.size() > 1) + Parts[1].getAsInteger(0, Minor); + if (Parts.size() > 2) + VersionPatch = Parts[2].str(); + if (VersionPatch.empty()) + VersionPatch = "0"; + if (Major == 0 || Minor == 0) + D.Diag(diag::err_drv_invalid_value) + << A->getAsString(Args) << HIPVersionArg; + + VersionMajorMinor = llvm::VersionTuple(Major, Minor); + DetectedVersion = + (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); + } else { + VersionPatch = DefaultVersionPatch; + VersionMajorMinor = + llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); + DetectedVersion = (Twine(DefaultVersionMajor) + "." + + Twine(DefaultVersionMinor) + "." + VersionPatch) + .str(); + } + + if (DetectHIPRuntime) + detectHIPRuntime(); + if (DetectDeviceLib) + detectDeviceLibrary(); +} + +void RocmInstallationDetector::detectDeviceLibrary() { + assert(LibDevicePath.empty()); + + if (!RocmDeviceLibPathArg.empty()) + LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; + else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH")) + LibDevicePath = LibPathEnv; + + auto &FS = D.getVFS(); + if (!LibDevicePath.empty()) { + // Maintain compatability with HIP flag/envvar pointing directly at the + // bitcode library directory. This points directly at the library path instead + // of the rocm root installation. + if (!FS.exists(LibDevicePath)) + return; + + scanLibDevicePath(LibDevicePath); + HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); + return; + } + + // The install path situation in old versions of ROCm is a real mess, and + // use a different install layout. Multiple copies of the device libraries + // exist for each frontend project, and differ depending on which build + // system produced the packages. Standalone OpenCL builds also have a + // different directory structure from the ROCm OpenCL package. + auto Candidates = getInstallationPathCandidates(); + for (const auto &Candidate : Candidates) { + auto CandidatePath = Candidate.Path; + + // Check device library exists at the given path. + auto CheckDeviceLib = [&](StringRef Path) { + bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking); + if (CheckLibDevice && !FS.exists(Path)) + return false; + + scanLibDevicePath(Path); + + if (!NoBuiltinLibs) { + // Check that the required non-target libraries are all available. + if (!allGenericLibsValid()) + return false; + + // Check that we have found at least one libdevice that we can link in + // if -nobuiltinlib hasn't been specified. + if (LibDeviceMap.empty()) + return false; + } + return true; + }; + + // The possible structures are: + // - ${ROCM_ROOT}/amdgcn/bitcode/* + // - ${ROCM_ROOT}/lib/* + // - ${ROCM_ROOT}/lib/bitcode/* + // so try to detect these layouts. + static llvm::SmallVector<const char *, 2> SubDirsList[] = { + {"amdgcn", "bitcode"}, + {"lib"}, + {"lib", "bitcode"}, + }; + + // Make a path by appending sub-directories to InstallPath. + auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) { + auto Path = CandidatePath; + for (auto SubDir : SubDirs) + llvm::sys::path::append(Path, SubDir); + return Path; + }; + + for (auto SubDirs : SubDirsList) { + LibDevicePath = MakePath(SubDirs); + HasDeviceLibrary = CheckDeviceLib(LibDevicePath); + if (HasDeviceLibrary) + return; + } + } +} + +void RocmInstallationDetector::detectHIPRuntime() { + auto Candidates = getInstallationPathCandidates(); + auto &FS = D.getVFS(); + + for (const auto &Candidate : Candidates) { + InstallPath = Candidate.Path; + if (InstallPath.empty() || !FS.exists(InstallPath)) + continue; + + BinPath = InstallPath; + llvm::sys::path::append(BinPath, "bin"); + IncludePath = InstallPath; + llvm::sys::path::append(IncludePath, "include"); + LibPath = InstallPath; + llvm::sys::path::append(LibPath, "lib"); + + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = + FS.getBufferForFile(BinPath + "/.hipVersion"); + if (!VersionFile && Candidate.StrictChecking) + continue; + + if (HIPVersionArg.empty() && VersionFile) + ParseHIPVersionFile((*VersionFile)->getBuffer()); + + HasHIPRuntime = true; + return; + } + HasHIPRuntime = false; +} + +void RocmInstallationDetector::print(raw_ostream &OS) const { + if (hasHIPRuntime()) + OS << "Found HIP installation: " << InstallPath << ", version " + << DetectedVersion << '\n'; +} + +void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5); + + if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { + // HIP header includes standard library wrapper headers under clang + // cuda_wrappers directory. Since these wrapper headers include_next + // standard C++ headers, whereas libc++ headers include_next other clang + // headers. The include paths have to follow this order: + // - wrapper include path + // - standard C++ include path + // - other clang include path + // Since standard C++ and other clang include paths are added in other + // places after this function, here we only need to make sure wrapper + // include path is added. + // + // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs + // a workaround. + SmallString<128> P(D.ResourceDir); + if (UsesRuntimeWrapper) + llvm::sys::path::append(P, "include", "cuda_wrappers"); + CC1Args.push_back("-internal-isystem"); + CC1Args.push_back(DriverArgs.MakeArgString(P)); + } + + if (DriverArgs.hasArg(options::OPT_nogpuinc)) + return; + + if (!hasHIPRuntime()) { + D.Diag(diag::err_drv_no_hip_runtime); + return; + } + + CC1Args.push_back("-internal-isystem"); + CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); + if (UsesRuntimeWrapper) + CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); +} + void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -31,8 +354,9 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-shared"); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); - C.addCommand(std::make_unique<Command>(JA, *this, Args.MakeArgString(Linker), - CmdArgs, Inputs)); + C.addCommand( + std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(), + Args.MakeArgString(Linker), CmdArgs, Inputs)); } void amdgpu::getAMDGPUTargetFeatures(const Driver &D, @@ -102,6 +426,73 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, return DAL; } +bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( + llvm::AMDGPU::GPUKind Kind) { + + // Assume nothing without a specific target. + if (Kind == llvm::AMDGPU::GK_NONE) + return false; + + const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + + // Default to enabling f32 denormals by default on subtargets where fma is + // fast with denormals + const bool BothDenormAndFMAFast = + (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && + (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); + return !BothDenormAndFMAFast; +} + +llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( + const llvm::opt::ArgList &DriverArgs, const JobAction &JA, + const llvm::fltSemantics *FPType) const { + // Denormals should always be enabled for f16 and f64. + if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) + return llvm::DenormalMode::getIEEE(); + + if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || + JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { + auto Kind = llvm::AMDGPU::parseArchAMDGCN(JA.getOffloadingArch()); + if (FPType && FPType == &llvm::APFloat::IEEEsingle() && + DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, + options::OPT_fno_cuda_flush_denormals_to_zero, + getDefaultDenormsAreZeroForTarget(Kind))) + return llvm::DenormalMode::getPreserveSign(); + + return llvm::DenormalMode::getIEEE(); + } + + const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); + + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || + getDefaultDenormsAreZeroForTarget(Kind); + + // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are + // also implicit treated as zero (DAZ). + return DAZ ? llvm::DenormalMode::getPreserveSign() : + llvm::DenormalMode::getIEEE(); +} + +bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, + llvm::AMDGPU::GPUKind Kind) { + const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); + static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); + + return !HasWave32 || DriverArgs.hasFlag( + options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); +} + + +/// ROCM Toolchain +ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : AMDGPUToolChain(D, Triple, Args) { + RocmInstallation.detectDeviceLibrary(); +} + void AMDGPUToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, @@ -115,3 +506,91 @@ void AMDGPUToolChain::addClangTargetOptions( CC1Args.push_back("-fapply-global-visibility-to-externs"); } } + +void ROCMToolChain::addClangTargetOptions( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + Action::OffloadKind DeviceOffloadingKind) const { + AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, + DeviceOffloadingKind); + + // For the OpenCL case where there is no offload target, accept -nostdlib to + // disable bitcode linking. + if (DeviceOffloadingKind == Action::OFK_None && + DriverArgs.hasArg(options::OPT_nostdlib)) + return; + + if (DriverArgs.hasArg(options::OPT_nogpulib)) + return; + + if (!RocmInstallation.hasDeviceLibrary()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; + return; + } + + // Get the device name and canonicalize it + const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); + auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); + const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); + std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); + if (LibDeviceFile.empty()) { + getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch; + return; + } + + bool Wave64 = isWave64(DriverArgs, Kind); + + // TODO: There are way too many flags that change this. Do we need to check + // them all? + bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || + getDefaultDenormsAreZeroForTarget(Kind); + bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); + + bool UnsafeMathOpt = + DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); + bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); + bool CorrectSqrt = + DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); + + // Add the OpenCL specific bitcode library. + CC1Args.push_back("-mlink-builtin-bitcode"); + CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath())); + + // Add the generic set of libraries. + RocmInstallation.addCommonBitcodeLibCC1Args( + DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, + UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); +} + +void RocmInstallationDetector::addCommonBitcodeLibCC1Args( + const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, + StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly, + bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const { + static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode"; + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath())); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath())); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString( + getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString( + getFiniteOnlyPath(FiniteOnly || FastRelaxedMath))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back( + DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64))); + + CC1Args.push_back(LinkBitcodeFlag); + CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); +} |