diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) |
Diffstat (limited to 'lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp new file mode 100644 index 000000000000..7a7addd0f5cf --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -0,0 +1,336 @@ +//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This pass propagates attributes from kernels to the non-entry +/// functions. Most of the library functions were not compiled for specific ABI, +/// yet will be correctly compiled if proper attrbutes are propagated from the +/// caller. +/// +/// The pass analyzes call graph and propagates ABI target features through the +/// call graph. +/// +/// It can run in two modes: as a function or module pass. A function pass +/// simply propagates attributes. A module pass clones functions if there are +/// callers with different ABI. If a function is clonned all call sites will +/// be updated to use a correct clone. +/// +/// A function pass is limited in functionality but can run early in the +/// pipeline. A module pass is more powerful but has to run late, so misses +/// library folding opportunities. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include <string> + +#define DEBUG_TYPE "amdgpu-propagate-attributes" + +using namespace llvm; + +namespace llvm { +extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; +} + +namespace { + +class AMDGPUPropagateAttributes { + const FeatureBitset TargetFeatures = { + AMDGPU::FeatureWavefrontSize16, + AMDGPU::FeatureWavefrontSize32, + AMDGPU::FeatureWavefrontSize64 + }; + + class Clone{ + public: + Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) : + FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {} + + FeatureBitset FeatureMask; + Function *OrigF; + Function *NewF; + }; + + const TargetMachine *TM; + + // Clone functions as needed or just set attributes. + bool AllowClone; + + // Option propagation roots. + SmallSet<Function *, 32> Roots; + + // Clones of functions with their attributes. + SmallVector<Clone, 32> Clones; + + // Find a clone with required features. + Function *findFunction(const FeatureBitset &FeaturesNeeded, + Function *OrigF); + + // Clone function F and set NewFeatures on the clone. + // Cole takes the name of original function. + Function *cloneWithFeatures(Function &F, + const FeatureBitset &NewFeatures); + + // Set new function's features in place. + void setFeatures(Function &F, const FeatureBitset &NewFeatures); + + std::string getFeatureString(const FeatureBitset &Features) const; + + // Propagate attributes from Roots. + bool process(); + +public: + AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : + TM(TM), AllowClone(AllowClone) {} + + // Use F as a root and propagate its attributes. + bool process(Function &F); + + // Propagate attributes starting from kernel functions. + bool process(Module &M); +}; + +// Allows to propagate attributes early, but no clonning is allowed as it must +// be a function pass to run before any optimizations. +// TODO: We shall only need a one instance of module pass, but that needs to be +// in the linker pipeline which is currently not possible. +class AMDGPUPropagateAttributesEarly : public FunctionPass { + const TargetMachine *TM; + +public: + static char ID; // Pass identification + + AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : + FunctionPass(ID), TM(TM) { + initializeAMDGPUPropagateAttributesEarlyPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; +}; + +// Allows to propagate attributes with clonning but does that late in the +// pipeline. +class AMDGPUPropagateAttributesLate : public ModulePass { + const TargetMachine *TM; + +public: + static char ID; // Pass identification + + AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : + ModulePass(ID), TM(TM) { + initializeAMDGPUPropagateAttributesLatePass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; +}; + +} // end anonymous namespace. + +char AMDGPUPropagateAttributesEarly::ID = 0; +char AMDGPUPropagateAttributesLate::ID = 0; + +INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, + "amdgpu-propagate-attributes-early", + "Early propagate attributes from kernels to functions", + false, false) +INITIALIZE_PASS(AMDGPUPropagateAttributesLate, + "amdgpu-propagate-attributes-late", + "Late propagate attributes from kernels to functions", + false, false) + +Function * +AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded, + Function *OrigF) { + // TODO: search for clone's clones. + for (Clone &C : Clones) + if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask) + return C.NewF; + + return nullptr; +} + +bool AMDGPUPropagateAttributes::process(Module &M) { + for (auto &F : M.functions()) + if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) + Roots.insert(&F); + + return process(); +} + +bool AMDGPUPropagateAttributes::process(Function &F) { + Roots.insert(&F); + return process(); +} + +bool AMDGPUPropagateAttributes::process() { + bool Changed = false; + SmallSet<Function *, 32> NewRoots; + SmallSet<Function *, 32> Replaced; + + if (Roots.empty()) + return false; + Module &M = *(*Roots.begin())->getParent(); + + do { + Roots.insert(NewRoots.begin(), NewRoots.end()); + NewRoots.clear(); + + for (auto &F : M.functions()) { + if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F)) + continue; + + const FeatureBitset &CalleeBits = + TM->getSubtargetImpl(F)->getFeatureBits(); + SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; + + for (User *U : F.users()) { + Instruction *I = dyn_cast<Instruction>(U); + if (!I) + continue; + CallBase *CI = dyn_cast<CallBase>(I); + if (!CI) + continue; + Function *Caller = CI->getCaller(); + if (!Caller) + continue; + if (!Roots.count(Caller)) + continue; + + const FeatureBitset &CallerBits = + TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures; + + if (CallerBits == (CalleeBits & TargetFeatures)) { + NewRoots.insert(&F); + continue; + } + + Function *NewF = findFunction(CallerBits, &F); + if (!NewF) { + FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) | + CallerBits); + if (!AllowClone) { + // This may set different features on different iteartions if + // there is a contradiction in callers' attributes. In this case + // we rely on a second pass running on Module, which is allowed + // to clone. + setFeatures(F, NewFeatures); + NewRoots.insert(&F); + Changed = true; + break; + } + + NewF = cloneWithFeatures(F, NewFeatures); + Clones.push_back(Clone(CallerBits, &F, NewF)); + NewRoots.insert(NewF); + } + + ToReplace.push_back(std::make_pair(CI, NewF)); + Replaced.insert(&F); + + Changed = true; + } + + while (!ToReplace.empty()) { + auto R = ToReplace.pop_back_val(); + R.first->setCalledFunction(R.second); + } + } + } while (!NewRoots.empty()); + + for (Function *F : Replaced) { + if (F->use_empty()) + F->eraseFromParent(); + } + + return Changed; +} + +Function * +AMDGPUPropagateAttributes::cloneWithFeatures(Function &F, + const FeatureBitset &NewFeatures) { + LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); + + ValueToValueMapTy dummy; + Function *NewF = CloneFunction(&F, dummy); + setFeatures(*NewF, NewFeatures); + + // Swap names. If that is the only clone it will retain the name of now + // dead value. + if (F.hasName()) { + std::string NewName = NewF->getName(); + NewF->takeName(&F); + F.setName(NewName); + + // Name has changed, it does not need an external symbol. + F.setVisibility(GlobalValue::DefaultVisibility); + F.setLinkage(GlobalValue::InternalLinkage); + } + + return NewF; +} + +void AMDGPUPropagateAttributes::setFeatures(Function &F, + const FeatureBitset &NewFeatures) { + std::string NewFeatureStr = getFeatureString(NewFeatures); + + LLVM_DEBUG(dbgs() << "Set features " + << getFeatureString(NewFeatures & TargetFeatures) + << " on " << F.getName() << '\n'); + + F.removeFnAttr("target-features"); + F.addFnAttr("target-features", NewFeatureStr); +} + +std::string +AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const +{ + std::string Ret; + for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { + if (Features[KV.Value]) + Ret += (StringRef("+") + KV.Key + ",").str(); + else if (TargetFeatures[KV.Value]) + Ret += (StringRef("-") + KV.Key + ",").str(); + } + Ret.pop_back(); // Remove last comma. + return Ret; +} + +bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { + if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) + return false; + + return AMDGPUPropagateAttributes(TM, false).process(F); +} + +bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { + if (!TM) + return false; + + return AMDGPUPropagateAttributes(TM, true).process(M); +} + +FunctionPass +*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { + return new AMDGPUPropagateAttributesEarly(TM); +} + +ModulePass +*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { + return new AMDGPUPropagateAttributesLate(TM); +} |