diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp new file mode 100644 index 0000000000000..74fe0cdd1ea7f --- /dev/null +++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -0,0 +1,265 @@ +//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Performs general IR level optimizations on SVE intrinsics. +// +// The main goal of this pass is to remove unnecessary reinterpret +// intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g: +// +// %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a) +// %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1) +// +// This pass also looks for ptest intrinsics & phi instructions where the +// operands are being needlessly converted to and from svbool_t. +// +//===----------------------------------------------------------------------===// + +#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "sve-intrinsic-opts" + +namespace llvm { +void initializeSVEIntrinsicOptsPass(PassRegistry &); +} + +namespace { +struct SVEIntrinsicOpts : public ModulePass { + static char ID; // Pass identification, replacement for typeid + SVEIntrinsicOpts() : ModulePass(ID) { + initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + static IntrinsicInst *isReinterpretToSVBool(Value *V); + + static bool optimizeIntrinsic(Instruction *I); + + bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); + + static bool optimizeConvertFromSVBool(IntrinsicInst *I); + static bool optimizePTest(IntrinsicInst *I); + + static bool processPhiNode(IntrinsicInst *I); +}; +} // end anonymous namespace + +void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); + AU.setPreservesCFG(); +} + +char SVEIntrinsicOpts::ID = 0; +static const char *name = "SVE intrinsics optimizations"; +INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); +INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) + +namespace llvm { +ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); } +} // namespace llvm + +/// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr +/// otherwise. +IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) { + IntrinsicInst *I = dyn_cast<IntrinsicInst>(V); + if (!I) + return nullptr; + + if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) + return nullptr; + + return I; +} + +/// The function will remove redundant reinterprets casting in the presence +/// of the control flow +bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) { + + SmallVector<Instruction *, 32> Worklist; + auto RequiredType = X->getType(); + + auto *PN = dyn_cast<PHINode>(X->getArgOperand(0)); + assert(PN && "Expected Phi Node!"); + + // Don't create a new Phi unless we can remove the old one. + if (!PN->hasOneUse()) + return false; + + for (Value *IncValPhi : PN->incoming_values()) { + auto *Reinterpret = isReinterpretToSVBool(IncValPhi); + if (!Reinterpret || + RequiredType != Reinterpret->getArgOperand(0)->getType()) + return false; + } + + // Create the new Phi + LLVMContext &Ctx = PN->getContext(); + IRBuilder<> Builder(Ctx); + Builder.SetInsertPoint(PN); + PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); + Worklist.push_back(PN); + + for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { + auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I)); + NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); + Worklist.push_back(Reinterpret); + } + + // Cleanup Phi Node and reinterprets + X->replaceAllUsesWith(NPN); + X->eraseFromParent(); + + for (auto &I : Worklist) + if (I->use_empty()) + I->eraseFromParent(); + + return true; +} + +bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { + IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0)); + IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1)); + + if (Op1 && Op2 && + Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && + Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && + Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { + + Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; + Type *Tys[] = {Op1->getArgOperand(0)->getType()}; + Module *M = I->getParent()->getParent()->getParent(); + + auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys); + auto CI = CallInst::Create(Fn, Ops, I->getName(), I); + + I->replaceAllUsesWith(CI); + I->eraseFromParent(); + if (Op1->use_empty()) + Op1->eraseFromParent(); + if (Op2->use_empty()) + Op2->eraseFromParent(); + + return true; + } + + return false; +} + +bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { + assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool && + "Unexpected opcode"); + + // If the reinterpret instruction operand is a PHI Node + if (isa<PHINode>(I->getArgOperand(0))) + return processPhiNode(I); + + // If we have a reinterpret intrinsic I of type A which is converting from + // another reinterpret Y of type B, and the source type of Y is A, then we can + // elide away both reinterprets if there are no other users of Y. + auto *Y = isReinterpretToSVBool(I->getArgOperand(0)); + if (!Y) + return false; + + Value *SourceVal = Y->getArgOperand(0); + if (I->getType() != SourceVal->getType()) + return false; + + I->replaceAllUsesWith(SourceVal); + I->eraseFromParent(); + if (Y->use_empty()) + Y->eraseFromParent(); + + return true; +} + +bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) { + IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I); + if (!IntrI) + return false; + + switch (IntrI->getIntrinsicID()) { + case Intrinsic::aarch64_sve_convert_from_svbool: + return optimizeConvertFromSVBool(IntrI); + case Intrinsic::aarch64_sve_ptest_any: + case Intrinsic::aarch64_sve_ptest_first: + case Intrinsic::aarch64_sve_ptest_last: + return optimizePTest(IntrI); + default: + return false; + } + + return true; +} + +bool SVEIntrinsicOpts::optimizeFunctions( + SmallSetVector<Function *, 4> &Functions) { + bool Changed = false; + for (auto *F : Functions) { + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree(); + + // Traverse the DT with an rpo walk so we see defs before uses, allowing + // simplification to be done incrementally. + BasicBlock *Root = DT->getRoot(); + ReversePostOrderTraversal<BasicBlock *> RPOT(Root); + for (auto *BB : RPOT) + for (Instruction &I : make_early_inc_range(*BB)) + Changed |= optimizeIntrinsic(&I); + } + return Changed; +} + +bool SVEIntrinsicOpts::runOnModule(Module &M) { + bool Changed = false; + SmallSetVector<Function *, 4> Functions; + + // Check for SVE intrinsic declarations first so that we only iterate over + // relevant functions. Where an appropriate declaration is found, store the + // function(s) where it is used so we can target these only. + for (auto &F : M.getFunctionList()) { + if (!F.isDeclaration()) + continue; + + switch (F.getIntrinsicID()) { + case Intrinsic::aarch64_sve_convert_from_svbool: + case Intrinsic::aarch64_sve_ptest_any: + case Intrinsic::aarch64_sve_ptest_first: + case Intrinsic::aarch64_sve_ptest_last: + for (auto I = F.user_begin(), E = F.user_end(); I != E;) { + auto *Inst = dyn_cast<Instruction>(*I++); + Functions.insert(Inst->getFunction()); + } + break; + default: + break; + } + } + + if (!Functions.empty()) + Changed |= optimizeFunctions(Functions); + + return Changed; +} |