diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp | 748 |
1 files changed, 748 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp new file mode 100644 index 000000000000..e5df1d456c1e --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -0,0 +1,748 @@ +//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass removes the computation of provably redundant expressions that have +// been computed earlier in a previous iteration. It relies on the use of PHIs +// to identify loop carried dependences. This is scalar replacement for vector +// types. +// +//----------------------------------------------------------------------------- +// Motivation: Consider the case where we have the following loop structure. +// +// Loop: +// t0 = a[i]; +// t1 = f(t0); +// t2 = g(t1); +// ... +// t3 = a[i+1]; +// t4 = f(t3); +// t5 = g(t4); +// t6 = op(t2, t5) +// cond_branch <Loop> +// +// This can be converted to +// t00 = a[0]; +// t10 = f(t00); +// t20 = g(t10); +// Loop: +// t2 = t20; +// t3 = a[i+1]; +// t4 = f(t3); +// t5 = g(t4); +// t6 = op(t2, t5) +// t20 = t5 +// cond_branch <Loop> +// +// SROA does a good job of reusing a[i+1] as a[i] in the next iteration. +// Such a loop comes to this pass in the following form. +// +// LoopPreheader: +// X0 = a[0]; +// Loop: +// X2 = PHI<(X0, LoopPreheader), (X1, Loop)> +// t1 = f(X2) <-- I1 +// t2 = g(t1) +// ... +// X1 = a[i+1] +// t4 = f(X1) <-- I2 +// t5 = g(t4) +// t6 = op(t2, t5) +// cond_branch <Loop> +// +// In this pass, we look for PHIs such as X2 whose incoming values come only +// from the Loop Preheader and over the backedge and additionaly, both these +// values are the results of the same operation in terms of opcode. We call such +// a PHI node a dependence chain or DepChain. In this case, the dependence of X2 +// over X1 is carried over only one iteration and so the DepChain is only one +// PHI node long. +// +// Then, we traverse the uses of the PHI (X2) and the uses of the value of the +// PHI coming over the backedge (X1). We stop at the first pair of such users +// I1 (of X2) and I2 (of X1) that meet the following conditions. +// 1. I1 and I2 are the same operation, but with different operands. +// 2. X2 and X1 are used at the same operand number in the two instructions. +// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a +// a DepChain from Op1 to Op2 of the same length as that between X2 and X1. +// +// We then make the following transformation +// LoopPreheader: +// X0 = a[0]; +// Y0 = f(X0); +// Loop: +// X2 = PHI<(X0, LoopPreheader), (X1, Loop)> +// Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> +// t1 = f(X2) <-- Will be removed by DCE. +// t2 = g(Y2) +// ... +// X1 = a[i+1] +// t4 = f(X1) +// t5 = g(t4) +// t6 = op(t2, t5) +// cond_branch <Loop> +// +// We proceed until we cannot find any more such instructions I1 and I2. +// +// --- DepChains & Loop carried dependences --- +// Consider a single basic block loop such as +// +// LoopPreheader: +// X0 = ... +// Y0 = ... +// Loop: +// X2 = PHI<(X0, LoopPreheader), (X1, Loop)> +// Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> +// ... +// X1 = ... +// ... +// cond_branch <Loop> +// +// Then there is a dependence between X2 and X1 that goes back one iteration, +// i.e. X1 is used as X2 in the very next iteration. We represent this as a +// DepChain from X2 to X1 (X2->X1). +// Similarly, there is a dependence between Y2 and X1 that goes back two +// iterations. X1 is used as Y2 two iterations after it is computed. This is +// represented by a DepChain as (Y2->X2->X1). +// +// A DepChain has the following properties. +// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of +// iterations of carried dependence + 1. +// 2. All instructions in the DepChain except the last are PHIs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <map> +#include <memory> +#include <set> + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-vlcr" + +STATISTIC(HexagonNumVectorLoopCarriedReuse, + "Number of values that were reused from a previous iteration."); + +static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", + cl::Hidden, + cl::desc("Maximum distance of loop carried dependences that are handled"), + cl::init(2), cl::ZeroOrMore); + +namespace llvm { + +void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); +Pass *createHexagonVectorLoopCarriedReusePass(); + +} // end namespace llvm + +namespace { + + // See info about DepChain in the comments at the top of this file. + using ChainOfDependences = SmallVector<Instruction *, 4>; + + class DepChain { + ChainOfDependences Chain; + + public: + bool isIdentical(DepChain &Other) const { + if (Other.size() != size()) + return false; + ChainOfDependences &OtherChain = Other.getChain(); + for (int i = 0; i < size(); ++i) { + if (Chain[i] != OtherChain[i]) + return false; + } + return true; + } + + ChainOfDependences &getChain() { + return Chain; + } + + int size() const { + return Chain.size(); + } + + void clear() { + Chain.clear(); + } + + void push_back(Instruction *I) { + Chain.push_back(I); + } + + int iterations() const { + return size() - 1; + } + + Instruction *front() const { + return Chain.front(); + } + + Instruction *back() const { + return Chain.back(); + } + + Instruction *&operator[](const int index) { + return Chain[index]; + } + + friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); + }; + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { + const ChainOfDependences &CD = D.Chain; + int ChainSize = CD.size(); + OS << "**DepChain Start::**\n"; + for (int i = 0; i < ChainSize -1; ++i) { + OS << *(CD[i]) << " -->\n"; + } + OS << *CD[ChainSize-1] << "\n"; + return OS; + } + + struct ReuseValue { + Instruction *Inst2Replace = nullptr; + + // In the new PHI node that we'll construct this is the value that'll be + // used over the backedge. This is teh value that gets reused from a + // previous iteration. + Instruction *BackedgeInst = nullptr; + std::map<Instruction *, DepChain *> DepChains; + int Iterations = -1; + + ReuseValue() = default; + + void reset() { + Inst2Replace = nullptr; + BackedgeInst = nullptr; + DepChains.clear(); + Iterations = -1; + } + bool isDefined() { return Inst2Replace != nullptr; } + }; + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { + OS << "** ReuseValue ***\n"; + OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; + OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; + return OS; + } + + class HexagonVectorLoopCarriedReuse : public LoopPass { + public: + static char ID; + + explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) { + PassRegistry *PR = PassRegistry::getPassRegistry(); + initializeHexagonVectorLoopCarriedReusePass(*PR); + } + + StringRef getPassName() const override { + return "Hexagon-specific loop carried reuse for HVX vectors"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.setPreservesCFG(); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; + + private: + SetVector<DepChain *> Dependences; + std::set<Instruction *> ReplacedInsts; + Loop *CurLoop; + ReuseValue ReuseCandidate; + + bool doVLCR(); + void findLoopCarriedDeps(); + void findValueToReuse(); + void findDepChainFromPHI(Instruction *I, DepChain &D); + void reuseValue(); + Value *findValueInBlock(Value *Op, BasicBlock *BB); + DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters); + bool isEquivalentOperation(Instruction *I1, Instruction *I2); + bool canReplace(Instruction *I); + bool isCallInstCommutative(CallInst *C); + }; + +} // end anonymous namespace + +char HexagonVectorLoopCarriedReuse::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", + "Hexagon-specific predictive commoning for HVX vectors", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) +INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", + "Hexagon-specific predictive commoning for HVX vectors", false, false) + +bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) { + if (skipLoop(L)) + return false; + + if (!L->getLoopPreheader()) + return false; + + // Work only on innermost loops. + if (!L->getSubLoops().empty()) + return false; + + // Work only on single basic blocks loops. + if (L->getNumBlocks() != 1) + return false; + + CurLoop = L; + + return doVLCR(); +} + +bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) { + switch (C->getCalledFunction()->getIntrinsicID()) { + case Intrinsic::hexagon_V6_vaddb: + case Intrinsic::hexagon_V6_vaddb_128B: + case Intrinsic::hexagon_V6_vaddh: + case Intrinsic::hexagon_V6_vaddh_128B: + case Intrinsic::hexagon_V6_vaddw: + case Intrinsic::hexagon_V6_vaddw_128B: + case Intrinsic::hexagon_V6_vaddubh: + case Intrinsic::hexagon_V6_vaddubh_128B: + case Intrinsic::hexagon_V6_vadduhw: + case Intrinsic::hexagon_V6_vadduhw_128B: + case Intrinsic::hexagon_V6_vaddhw: + case Intrinsic::hexagon_V6_vaddhw_128B: + case Intrinsic::hexagon_V6_vmaxb: + case Intrinsic::hexagon_V6_vmaxb_128B: + case Intrinsic::hexagon_V6_vmaxh: + case Intrinsic::hexagon_V6_vmaxh_128B: + case Intrinsic::hexagon_V6_vmaxw: + case Intrinsic::hexagon_V6_vmaxw_128B: + case Intrinsic::hexagon_V6_vmaxub: + case Intrinsic::hexagon_V6_vmaxub_128B: + case Intrinsic::hexagon_V6_vmaxuh: + case Intrinsic::hexagon_V6_vmaxuh_128B: + case Intrinsic::hexagon_V6_vminub: + case Intrinsic::hexagon_V6_vminub_128B: + case Intrinsic::hexagon_V6_vminuh: + case Intrinsic::hexagon_V6_vminuh_128B: + case Intrinsic::hexagon_V6_vminb: + case Intrinsic::hexagon_V6_vminb_128B: + case Intrinsic::hexagon_V6_vminh: + case Intrinsic::hexagon_V6_vminh_128B: + case Intrinsic::hexagon_V6_vminw: + case Intrinsic::hexagon_V6_vminw_128B: + case Intrinsic::hexagon_V6_vmpyub: + case Intrinsic::hexagon_V6_vmpyub_128B: + case Intrinsic::hexagon_V6_vmpyuh: + case Intrinsic::hexagon_V6_vmpyuh_128B: + case Intrinsic::hexagon_V6_vavgub: + case Intrinsic::hexagon_V6_vavgub_128B: + case Intrinsic::hexagon_V6_vavgh: + case Intrinsic::hexagon_V6_vavgh_128B: + case Intrinsic::hexagon_V6_vavguh: + case Intrinsic::hexagon_V6_vavguh_128B: + case Intrinsic::hexagon_V6_vavgw: + case Intrinsic::hexagon_V6_vavgw_128B: + case Intrinsic::hexagon_V6_vavgb: + case Intrinsic::hexagon_V6_vavgb_128B: + case Intrinsic::hexagon_V6_vavguw: + case Intrinsic::hexagon_V6_vavguw_128B: + case Intrinsic::hexagon_V6_vabsdiffh: + case Intrinsic::hexagon_V6_vabsdiffh_128B: + case Intrinsic::hexagon_V6_vabsdiffub: + case Intrinsic::hexagon_V6_vabsdiffub_128B: + case Intrinsic::hexagon_V6_vabsdiffuh: + case Intrinsic::hexagon_V6_vabsdiffuh_128B: + case Intrinsic::hexagon_V6_vabsdiffw: + case Intrinsic::hexagon_V6_vabsdiffw_128B: + return true; + default: + return false; + } +} + +bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, + Instruction *I2) { + if (!I1->isSameOperationAs(I2)) + return false; + // This check is in place specifically for intrinsics. isSameOperationAs will + // return two for any two hexagon intrinsics because they are essentially the + // same instruciton (CallInst). We need to scratch the surface to see if they + // are calls to the same function. + if (CallInst *C1 = dyn_cast<CallInst>(I1)) { + if (CallInst *C2 = dyn_cast<CallInst>(I2)) { + if (C1->getCalledFunction() != C2->getCalledFunction()) + return false; + } + } + + // If both the Instructions are of Vector Type and any of the element + // is integer constant, check their values too for equivalence. + if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { + unsigned NumOperands = I1->getNumOperands(); + for (unsigned i = 0; i < NumOperands; ++i) { + ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i)); + ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i)); + if(!C1) continue; + assert(C2); + if (C1->getSExtValue() != C2->getSExtValue()) + return false; + } + } + + return true; +} + +bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + if (!II) + return true; + + switch (II->getIntrinsicID()) { + case Intrinsic::hexagon_V6_hi: + case Intrinsic::hexagon_V6_lo: + case Intrinsic::hexagon_V6_hi_128B: + case Intrinsic::hexagon_V6_lo_128B: + LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n"); + return false; + default: + return true; + } +} +void HexagonVectorLoopCarriedReuse::findValueToReuse() { + for (auto *D : Dependences) { + LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n"); + if (D->iterations() > HexagonVLCRIterationLim) { + LLVM_DEBUG( + dbgs() + << ".. Skipping because number of iterations > than the limit\n"); + continue; + } + + PHINode *PN = cast<PHINode>(D->front()); + Instruction *BEInst = D->back(); + int Iters = D->iterations(); + BasicBlock *BB = PN->getParent(); + LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN + << " can be reused\n"); + + SmallVector<Instruction *, 4> PNUsers; + for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { + Use &U = *UI; + Instruction *User = cast<Instruction>(U.getUser()); + + if (User->getParent() != BB) + continue; + if (ReplacedInsts.count(User)) { + LLVM_DEBUG(dbgs() << *User + << " has already been replaced. Skipping...\n"); + continue; + } + if (isa<PHINode>(User)) + continue; + if (User->mayHaveSideEffects()) + continue; + if (!canReplace(User)) + continue; + + PNUsers.push_back(User); + } + LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n"); + + // For each interesting use I of PN, find an Instruction BEUser that + // performs the same operation as I on BEInst and whose other operands, + // if any, can also be rematerialized in OtherBB. We stop when we find the + // first such Instruction BEUser. This is because once BEUser is + // rematerialized in OtherBB, we may find more such "fixup" opportunities + // in this block. So, we'll start over again. + for (Instruction *I : PNUsers) { + for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E; + ++UI) { + Use &U = *UI; + Instruction *BEUser = cast<Instruction>(U.getUser()); + + if (BEUser->getParent() != BB) + continue; + if (!isEquivalentOperation(I, BEUser)) + continue; + + int NumOperands = I->getNumOperands(); + + // Take operands of each PNUser one by one and try to find DepChain + // with every operand of the BEUser. If any of the operands of BEUser + // has DepChain with current operand of the PNUser, break the matcher + // loop. Keep doing this for Every PNUser operand. If PNUser operand + // does not have DepChain with any of the BEUser operand, break the + // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate. + // This ensures that DepChain exist for all the PNUser operand with + // BEUser operand. This also ensures that DepChains are independent of + // the positions in PNUser and BEUser. + std::map<Instruction *, DepChain *> DepChains; + CallInst *C1 = dyn_cast<CallInst>(I); + if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) { + bool Found = false; + for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { + Value *Op = I->getOperand(OpNo); + Instruction *OpInst = dyn_cast<Instruction>(Op); + Found = false; + for (int T = 0; T < NumOperands; ++T) { + Value *BEOp = BEUser->getOperand(T); + Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); + if (!OpInst && !BEOpInst) { + if (Op == BEOp) { + Found = true; + break; + } + } + + if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst)) + continue; + + DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); + + if (D) { + Found = true; + DepChains[OpInst] = D; + break; + } + } + if (!Found) { + BEUser = nullptr; + break; + } + } + } else { + + for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { + Value *Op = I->getOperand(OpNo); + Value *BEOp = BEUser->getOperand(OpNo); + + Instruction *OpInst = dyn_cast<Instruction>(Op); + if (!OpInst) { + if (Op == BEOp) + continue; + // Do not allow reuse to occur when the operands may be different + // values. + BEUser = nullptr; + break; + } + + Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); + DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters); + + if (D) { + DepChains[OpInst] = D; + } else { + BEUser = nullptr; + break; + } + } + } + if (BEUser) { + LLVM_DEBUG(dbgs() << "Found Value for reuse.\n"); + ReuseCandidate.Inst2Replace = I; + ReuseCandidate.BackedgeInst = BEUser; + ReuseCandidate.DepChains = DepChains; + ReuseCandidate.Iterations = Iters; + return; + } + ReuseCandidate.reset(); + } + } + } + ReuseCandidate.reset(); +} + +Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, + BasicBlock *BB) { + PHINode *PN = dyn_cast<PHINode>(Op); + assert(PN); + Value *ValueInBlock = PN->getIncomingValueForBlock(BB); + return ValueInBlock; +} + +void HexagonVectorLoopCarriedReuse::reuseValue() { + LLVM_DEBUG(dbgs() << ReuseCandidate); + Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; + Instruction *BEInst = ReuseCandidate.BackedgeInst; + int NumOperands = Inst2Replace->getNumOperands(); + std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains; + int Iterations = ReuseCandidate.Iterations; + BasicBlock *LoopPH = CurLoop->getLoopPreheader(); + assert(!DepChains.empty() && "No DepChains"); + LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n"); + + SmallVector<Instruction *, 4> InstsInPreheader; + for (int i = 0; i < Iterations; ++i) { + Instruction *InstInPreheader = Inst2Replace->clone(); + SmallVector<Value *, 4> Ops; + for (int j = 0; j < NumOperands; ++j) { + Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j)); + if (!I) + continue; + // Get the DepChain corresponding to this operand. + DepChain &D = *DepChains[I]; + // Get the PHI for the iteration number and find + // the incoming value from the Loop Preheader for + // that PHI. + Value *ValInPreheader = findValueInBlock(D[i], LoopPH); + InstInPreheader->setOperand(j, ValInPreheader); + } + InstsInPreheader.push_back(InstInPreheader); + InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); + InstInPreheader->insertBefore(LoopPH->getTerminator()); + LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to " + << LoopPH->getName() << "\n"); + } + BasicBlock *BB = BEInst->getParent(); + IRBuilder<> IRB(BB); + IRB.SetInsertPoint(BB->getFirstNonPHI()); + Value *BEVal = BEInst; + PHINode *NewPhi; + for (int i = Iterations-1; i >=0 ; --i) { + Instruction *InstInPreheader = InstsInPreheader[i]; + NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); + NewPhi->addIncoming(InstInPreheader, LoopPH); + NewPhi->addIncoming(BEVal, BB); + LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() + << "\n"); + BEVal = NewPhi; + } + // We are in LCSSA form. So, a value defined inside the Loop is used only + // inside the loop. So, the following is safe. + Inst2Replace->replaceAllUsesWith(NewPhi); + ReplacedInsts.insert(Inst2Replace); + ++HexagonNumVectorLoopCarriedReuse; +} + +bool HexagonVectorLoopCarriedReuse::doVLCR() { + assert(CurLoop->getSubLoops().empty() && + "Can do VLCR on the innermost loop only"); + assert((CurLoop->getNumBlocks() == 1) && + "Can do VLCR only on single block loops"); + + bool Changed = false; + bool Continue; + + LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); + do { + // Reset datastructures. + Dependences.clear(); + Continue = false; + + findLoopCarriedDeps(); + findValueToReuse(); + if (ReuseCandidate.isDefined()) { + reuseValue(); + Changed = true; + Continue = true; + } + llvm::for_each(Dependences, std::default_delete<DepChain>()); + } while (Continue); + return Changed; +} + +void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, + DepChain &D) { + PHINode *PN = dyn_cast<PHINode>(I); + if (!PN) { + D.push_back(I); + return; + } else { + auto NumIncomingValues = PN->getNumIncomingValues(); + if (NumIncomingValues != 2) { + D.clear(); + return; + } + + BasicBlock *BB = PN->getParent(); + if (BB != CurLoop->getHeader()) { + D.clear(); + return; + } + + Value *BEVal = PN->getIncomingValueForBlock(BB); + Instruction *BEInst = dyn_cast<Instruction>(BEVal); + // This is a single block loop with a preheader, so at least + // one value should come over the backedge. + assert(BEInst && "There should be a value over the backedge"); + + Value *PreHdrVal = + PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); + if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { + D.clear(); + return; + } + D.push_back(PN); + findDepChainFromPHI(BEInst, D); + } +} + +DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, + Instruction *I2, + int Iters) { + for (auto *D : Dependences) { + if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters) + return D; + } + return nullptr; +} + +void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { + BasicBlock *BB = CurLoop->getHeader(); + for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { + auto *PN = cast<PHINode>(I); + if (!isa<VectorType>(PN->getType())) + continue; + + DepChain *D = new DepChain(); + findDepChainFromPHI(PN, *D); + if (D->size() != 0) + Dependences.insert(D); + else + delete D; + } + LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); + LLVM_DEBUG(for (size_t i = 0; i < Dependences.size(); + ++i) { dbgs() << *Dependences[i] << "\n"; }); +} + +Pass *llvm::createHexagonVectorLoopCarriedReusePass() { + return new HexagonVectorLoopCarriedReuse(); +} |
