summaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp748
1 files changed, 748 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
new file mode 100644
index 000000000000..e5df1d456c1e
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -0,0 +1,748 @@
+//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass removes the computation of provably redundant expressions that have
+// been computed earlier in a previous iteration. It relies on the use of PHIs
+// to identify loop carried dependences. This is scalar replacement for vector
+// types.
+//
+//-----------------------------------------------------------------------------
+// Motivation: Consider the case where we have the following loop structure.
+//
+// Loop:
+// t0 = a[i];
+// t1 = f(t0);
+// t2 = g(t1);
+// ...
+// t3 = a[i+1];
+// t4 = f(t3);
+// t5 = g(t4);
+// t6 = op(t2, t5)
+// cond_branch <Loop>
+//
+// This can be converted to
+// t00 = a[0];
+// t10 = f(t00);
+// t20 = g(t10);
+// Loop:
+// t2 = t20;
+// t3 = a[i+1];
+// t4 = f(t3);
+// t5 = g(t4);
+// t6 = op(t2, t5)
+// t20 = t5
+// cond_branch <Loop>
+//
+// SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
+// Such a loop comes to this pass in the following form.
+//
+// LoopPreheader:
+// X0 = a[0];
+// Loop:
+// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
+// t1 = f(X2) <-- I1
+// t2 = g(t1)
+// ...
+// X1 = a[i+1]
+// t4 = f(X1) <-- I2
+// t5 = g(t4)
+// t6 = op(t2, t5)
+// cond_branch <Loop>
+//
+// In this pass, we look for PHIs such as X2 whose incoming values come only
+// from the Loop Preheader and over the backedge and additionaly, both these
+// values are the results of the same operation in terms of opcode. We call such
+// a PHI node a dependence chain or DepChain. In this case, the dependence of X2
+// over X1 is carried over only one iteration and so the DepChain is only one
+// PHI node long.
+//
+// Then, we traverse the uses of the PHI (X2) and the uses of the value of the
+// PHI coming over the backedge (X1). We stop at the first pair of such users
+// I1 (of X2) and I2 (of X1) that meet the following conditions.
+// 1. I1 and I2 are the same operation, but with different operands.
+// 2. X2 and X1 are used at the same operand number in the two instructions.
+// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
+// a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
+//
+// We then make the following transformation
+// LoopPreheader:
+// X0 = a[0];
+// Y0 = f(X0);
+// Loop:
+// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
+// Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
+// t1 = f(X2) <-- Will be removed by DCE.
+// t2 = g(Y2)
+// ...
+// X1 = a[i+1]
+// t4 = f(X1)
+// t5 = g(t4)
+// t6 = op(t2, t5)
+// cond_branch <Loop>
+//
+// We proceed until we cannot find any more such instructions I1 and I2.
+//
+// --- DepChains & Loop carried dependences ---
+// Consider a single basic block loop such as
+//
+// LoopPreheader:
+// X0 = ...
+// Y0 = ...
+// Loop:
+// X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
+// Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
+// ...
+// X1 = ...
+// ...
+// cond_branch <Loop>
+//
+// Then there is a dependence between X2 and X1 that goes back one iteration,
+// i.e. X1 is used as X2 in the very next iteration. We represent this as a
+// DepChain from X2 to X1 (X2->X1).
+// Similarly, there is a dependence between Y2 and X1 that goes back two
+// iterations. X1 is used as Y2 two iterations after it is computed. This is
+// represented by a DepChain as (Y2->X2->X1).
+//
+// A DepChain has the following properties.
+// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
+// iterations of carried dependence + 1.
+// 2. All instructions in the DepChain except the last are PHIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <map>
+#include <memory>
+#include <set>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-vlcr"
+
+STATISTIC(HexagonNumVectorLoopCarriedReuse,
+ "Number of values that were reused from a previous iteration.");
+
+static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
+ cl::Hidden,
+ cl::desc("Maximum distance of loop carried dependences that are handled"),
+ cl::init(2), cl::ZeroOrMore);
+
+namespace llvm {
+
+void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
+Pass *createHexagonVectorLoopCarriedReusePass();
+
+} // end namespace llvm
+
+namespace {
+
+ // See info about DepChain in the comments at the top of this file.
+ using ChainOfDependences = SmallVector<Instruction *, 4>;
+
+ class DepChain {
+ ChainOfDependences Chain;
+
+ public:
+ bool isIdentical(DepChain &Other) const {
+ if (Other.size() != size())
+ return false;
+ ChainOfDependences &OtherChain = Other.getChain();
+ for (int i = 0; i < size(); ++i) {
+ if (Chain[i] != OtherChain[i])
+ return false;
+ }
+ return true;
+ }
+
+ ChainOfDependences &getChain() {
+ return Chain;
+ }
+
+ int size() const {
+ return Chain.size();
+ }
+
+ void clear() {
+ Chain.clear();
+ }
+
+ void push_back(Instruction *I) {
+ Chain.push_back(I);
+ }
+
+ int iterations() const {
+ return size() - 1;
+ }
+
+ Instruction *front() const {
+ return Chain.front();
+ }
+
+ Instruction *back() const {
+ return Chain.back();
+ }
+
+ Instruction *&operator[](const int index) {
+ return Chain[index];
+ }
+
+ friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
+ };
+
+ LLVM_ATTRIBUTE_UNUSED
+ raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
+ const ChainOfDependences &CD = D.Chain;
+ int ChainSize = CD.size();
+ OS << "**DepChain Start::**\n";
+ for (int i = 0; i < ChainSize -1; ++i) {
+ OS << *(CD[i]) << " -->\n";
+ }
+ OS << *CD[ChainSize-1] << "\n";
+ return OS;
+ }
+
+ struct ReuseValue {
+ Instruction *Inst2Replace = nullptr;
+
+ // In the new PHI node that we'll construct this is the value that'll be
+ // used over the backedge. This is teh value that gets reused from a
+ // previous iteration.
+ Instruction *BackedgeInst = nullptr;
+ std::map<Instruction *, DepChain *> DepChains;
+ int Iterations = -1;
+
+ ReuseValue() = default;
+
+ void reset() {
+ Inst2Replace = nullptr;
+ BackedgeInst = nullptr;
+ DepChains.clear();
+ Iterations = -1;
+ }
+ bool isDefined() { return Inst2Replace != nullptr; }
+ };
+
+ LLVM_ATTRIBUTE_UNUSED
+ raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
+ OS << "** ReuseValue ***\n";
+ OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
+ OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
+ return OS;
+ }
+
+ class HexagonVectorLoopCarriedReuse : public LoopPass {
+ public:
+ static char ID;
+
+ explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
+ PassRegistry *PR = PassRegistry::getPassRegistry();
+ initializeHexagonVectorLoopCarriedReusePass(*PR);
+ }
+
+ StringRef getPassName() const override {
+ return "Hexagon-specific loop carried reuse for HVX vectors";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.setPreservesCFG();
+ }
+
+ bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+ private:
+ SetVector<DepChain *> Dependences;
+ std::set<Instruction *> ReplacedInsts;
+ Loop *CurLoop;
+ ReuseValue ReuseCandidate;
+
+ bool doVLCR();
+ void findLoopCarriedDeps();
+ void findValueToReuse();
+ void findDepChainFromPHI(Instruction *I, DepChain &D);
+ void reuseValue();
+ Value *findValueInBlock(Value *Op, BasicBlock *BB);
+ DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
+ bool isEquivalentOperation(Instruction *I1, Instruction *I2);
+ bool canReplace(Instruction *I);
+ bool isCallInstCommutative(CallInst *C);
+ };
+
+} // end anonymous namespace
+
+char HexagonVectorLoopCarriedReuse::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
+ "Hexagon-specific predictive commoning for HVX vectors", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
+INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
+ "Hexagon-specific predictive commoning for HVX vectors", false, false)
+
+bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
+ if (skipLoop(L))
+ return false;
+
+ if (!L->getLoopPreheader())
+ return false;
+
+ // Work only on innermost loops.
+ if (!L->getSubLoops().empty())
+ return false;
+
+ // Work only on single basic blocks loops.
+ if (L->getNumBlocks() != 1)
+ return false;
+
+ CurLoop = L;
+
+ return doVLCR();
+}
+
+bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
+ switch (C->getCalledFunction()->getIntrinsicID()) {
+ case Intrinsic::hexagon_V6_vaddb:
+ case Intrinsic::hexagon_V6_vaddb_128B:
+ case Intrinsic::hexagon_V6_vaddh:
+ case Intrinsic::hexagon_V6_vaddh_128B:
+ case Intrinsic::hexagon_V6_vaddw:
+ case Intrinsic::hexagon_V6_vaddw_128B:
+ case Intrinsic::hexagon_V6_vaddubh:
+ case Intrinsic::hexagon_V6_vaddubh_128B:
+ case Intrinsic::hexagon_V6_vadduhw:
+ case Intrinsic::hexagon_V6_vadduhw_128B:
+ case Intrinsic::hexagon_V6_vaddhw:
+ case Intrinsic::hexagon_V6_vaddhw_128B:
+ case Intrinsic::hexagon_V6_vmaxb:
+ case Intrinsic::hexagon_V6_vmaxb_128B:
+ case Intrinsic::hexagon_V6_vmaxh:
+ case Intrinsic::hexagon_V6_vmaxh_128B:
+ case Intrinsic::hexagon_V6_vmaxw:
+ case Intrinsic::hexagon_V6_vmaxw_128B:
+ case Intrinsic::hexagon_V6_vmaxub:
+ case Intrinsic::hexagon_V6_vmaxub_128B:
+ case Intrinsic::hexagon_V6_vmaxuh:
+ case Intrinsic::hexagon_V6_vmaxuh_128B:
+ case Intrinsic::hexagon_V6_vminub:
+ case Intrinsic::hexagon_V6_vminub_128B:
+ case Intrinsic::hexagon_V6_vminuh:
+ case Intrinsic::hexagon_V6_vminuh_128B:
+ case Intrinsic::hexagon_V6_vminb:
+ case Intrinsic::hexagon_V6_vminb_128B:
+ case Intrinsic::hexagon_V6_vminh:
+ case Intrinsic::hexagon_V6_vminh_128B:
+ case Intrinsic::hexagon_V6_vminw:
+ case Intrinsic::hexagon_V6_vminw_128B:
+ case Intrinsic::hexagon_V6_vmpyub:
+ case Intrinsic::hexagon_V6_vmpyub_128B:
+ case Intrinsic::hexagon_V6_vmpyuh:
+ case Intrinsic::hexagon_V6_vmpyuh_128B:
+ case Intrinsic::hexagon_V6_vavgub:
+ case Intrinsic::hexagon_V6_vavgub_128B:
+ case Intrinsic::hexagon_V6_vavgh:
+ case Intrinsic::hexagon_V6_vavgh_128B:
+ case Intrinsic::hexagon_V6_vavguh:
+ case Intrinsic::hexagon_V6_vavguh_128B:
+ case Intrinsic::hexagon_V6_vavgw:
+ case Intrinsic::hexagon_V6_vavgw_128B:
+ case Intrinsic::hexagon_V6_vavgb:
+ case Intrinsic::hexagon_V6_vavgb_128B:
+ case Intrinsic::hexagon_V6_vavguw:
+ case Intrinsic::hexagon_V6_vavguw_128B:
+ case Intrinsic::hexagon_V6_vabsdiffh:
+ case Intrinsic::hexagon_V6_vabsdiffh_128B:
+ case Intrinsic::hexagon_V6_vabsdiffub:
+ case Intrinsic::hexagon_V6_vabsdiffub_128B:
+ case Intrinsic::hexagon_V6_vabsdiffuh:
+ case Intrinsic::hexagon_V6_vabsdiffuh_128B:
+ case Intrinsic::hexagon_V6_vabsdiffw:
+ case Intrinsic::hexagon_V6_vabsdiffw_128B:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
+ Instruction *I2) {
+ if (!I1->isSameOperationAs(I2))
+ return false;
+ // This check is in place specifically for intrinsics. isSameOperationAs will
+ // return two for any two hexagon intrinsics because they are essentially the
+ // same instruciton (CallInst). We need to scratch the surface to see if they
+ // are calls to the same function.
+ if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
+ if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
+ if (C1->getCalledFunction() != C2->getCalledFunction())
+ return false;
+ }
+ }
+
+ // If both the Instructions are of Vector Type and any of the element
+ // is integer constant, check their values too for equivalence.
+ if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
+ unsigned NumOperands = I1->getNumOperands();
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
+ ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
+ if(!C1) continue;
+ assert(C2);
+ if (C1->getSExtValue() != C2->getSExtValue())
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return true;
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::hexagon_V6_hi:
+ case Intrinsic::hexagon_V6_lo:
+ case Intrinsic::hexagon_V6_hi_128B:
+ case Intrinsic::hexagon_V6_lo_128B:
+ LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
+ return false;
+ default:
+ return true;
+ }
+}
+void HexagonVectorLoopCarriedReuse::findValueToReuse() {
+ for (auto *D : Dependences) {
+ LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
+ if (D->iterations() > HexagonVLCRIterationLim) {
+ LLVM_DEBUG(
+ dbgs()
+ << ".. Skipping because number of iterations > than the limit\n");
+ continue;
+ }
+
+ PHINode *PN = cast<PHINode>(D->front());
+ Instruction *BEInst = D->back();
+ int Iters = D->iterations();
+ BasicBlock *BB = PN->getParent();
+ LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
+ << " can be reused\n");
+
+ SmallVector<Instruction *, 4> PNUsers;
+ for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
+ Use &U = *UI;
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ if (User->getParent() != BB)
+ continue;
+ if (ReplacedInsts.count(User)) {
+ LLVM_DEBUG(dbgs() << *User
+ << " has already been replaced. Skipping...\n");
+ continue;
+ }
+ if (isa<PHINode>(User))
+ continue;
+ if (User->mayHaveSideEffects())
+ continue;
+ if (!canReplace(User))
+ continue;
+
+ PNUsers.push_back(User);
+ }
+ LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
+
+ // For each interesting use I of PN, find an Instruction BEUser that
+ // performs the same operation as I on BEInst and whose other operands,
+ // if any, can also be rematerialized in OtherBB. We stop when we find the
+ // first such Instruction BEUser. This is because once BEUser is
+ // rematerialized in OtherBB, we may find more such "fixup" opportunities
+ // in this block. So, we'll start over again.
+ for (Instruction *I : PNUsers) {
+ for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
+ ++UI) {
+ Use &U = *UI;
+ Instruction *BEUser = cast<Instruction>(U.getUser());
+
+ if (BEUser->getParent() != BB)
+ continue;
+ if (!isEquivalentOperation(I, BEUser))
+ continue;
+
+ int NumOperands = I->getNumOperands();
+
+ // Take operands of each PNUser one by one and try to find DepChain
+ // with every operand of the BEUser. If any of the operands of BEUser
+ // has DepChain with current operand of the PNUser, break the matcher
+ // loop. Keep doing this for Every PNUser operand. If PNUser operand
+ // does not have DepChain with any of the BEUser operand, break the
+ // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
+ // This ensures that DepChain exist for all the PNUser operand with
+ // BEUser operand. This also ensures that DepChains are independent of
+ // the positions in PNUser and BEUser.
+ std::map<Instruction *, DepChain *> DepChains;
+ CallInst *C1 = dyn_cast<CallInst>(I);
+ if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
+ bool Found = false;
+ for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+ Value *Op = I->getOperand(OpNo);
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
+ Found = false;
+ for (int T = 0; T < NumOperands; ++T) {
+ Value *BEOp = BEUser->getOperand(T);
+ Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+ if (!OpInst && !BEOpInst) {
+ if (Op == BEOp) {
+ Found = true;
+ break;
+ }
+ }
+
+ if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
+ continue;
+
+ DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+ if (D) {
+ Found = true;
+ DepChains[OpInst] = D;
+ break;
+ }
+ }
+ if (!Found) {
+ BEUser = nullptr;
+ break;
+ }
+ }
+ } else {
+
+ for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
+ Value *Op = I->getOperand(OpNo);
+ Value *BEOp = BEUser->getOperand(OpNo);
+
+ Instruction *OpInst = dyn_cast<Instruction>(Op);
+ if (!OpInst) {
+ if (Op == BEOp)
+ continue;
+ // Do not allow reuse to occur when the operands may be different
+ // values.
+ BEUser = nullptr;
+ break;
+ }
+
+ Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
+ DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
+
+ if (D) {
+ DepChains[OpInst] = D;
+ } else {
+ BEUser = nullptr;
+ break;
+ }
+ }
+ }
+ if (BEUser) {
+ LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
+ ReuseCandidate.Inst2Replace = I;
+ ReuseCandidate.BackedgeInst = BEUser;
+ ReuseCandidate.DepChains = DepChains;
+ ReuseCandidate.Iterations = Iters;
+ return;
+ }
+ ReuseCandidate.reset();
+ }
+ }
+ }
+ ReuseCandidate.reset();
+}
+
+Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
+ BasicBlock *BB) {
+ PHINode *PN = dyn_cast<PHINode>(Op);
+ assert(PN);
+ Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
+ return ValueInBlock;
+}
+
+void HexagonVectorLoopCarriedReuse::reuseValue() {
+ LLVM_DEBUG(dbgs() << ReuseCandidate);
+ Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
+ Instruction *BEInst = ReuseCandidate.BackedgeInst;
+ int NumOperands = Inst2Replace->getNumOperands();
+ std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
+ int Iterations = ReuseCandidate.Iterations;
+ BasicBlock *LoopPH = CurLoop->getLoopPreheader();
+ assert(!DepChains.empty() && "No DepChains");
+ LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
+
+ SmallVector<Instruction *, 4> InstsInPreheader;
+ for (int i = 0; i < Iterations; ++i) {
+ Instruction *InstInPreheader = Inst2Replace->clone();
+ SmallVector<Value *, 4> Ops;
+ for (int j = 0; j < NumOperands; ++j) {
+ Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
+ if (!I)
+ continue;
+ // Get the DepChain corresponding to this operand.
+ DepChain &D = *DepChains[I];
+ // Get the PHI for the iteration number and find
+ // the incoming value from the Loop Preheader for
+ // that PHI.
+ Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
+ InstInPreheader->setOperand(j, ValInPreheader);
+ }
+ InstsInPreheader.push_back(InstInPreheader);
+ InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
+ InstInPreheader->insertBefore(LoopPH->getTerminator());
+ LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
+ << LoopPH->getName() << "\n");
+ }
+ BasicBlock *BB = BEInst->getParent();
+ IRBuilder<> IRB(BB);
+ IRB.SetInsertPoint(BB->getFirstNonPHI());
+ Value *BEVal = BEInst;
+ PHINode *NewPhi;
+ for (int i = Iterations-1; i >=0 ; --i) {
+ Instruction *InstInPreheader = InstsInPreheader[i];
+ NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
+ NewPhi->addIncoming(InstInPreheader, LoopPH);
+ NewPhi->addIncoming(BEVal, BB);
+ LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
+ << "\n");
+ BEVal = NewPhi;
+ }
+ // We are in LCSSA form. So, a value defined inside the Loop is used only
+ // inside the loop. So, the following is safe.
+ Inst2Replace->replaceAllUsesWith(NewPhi);
+ ReplacedInsts.insert(Inst2Replace);
+ ++HexagonNumVectorLoopCarriedReuse;
+}
+
+bool HexagonVectorLoopCarriedReuse::doVLCR() {
+ assert(CurLoop->getSubLoops().empty() &&
+ "Can do VLCR on the innermost loop only");
+ assert((CurLoop->getNumBlocks() == 1) &&
+ "Can do VLCR only on single block loops");
+
+ bool Changed = false;
+ bool Continue;
+
+ LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
+ do {
+ // Reset datastructures.
+ Dependences.clear();
+ Continue = false;
+
+ findLoopCarriedDeps();
+ findValueToReuse();
+ if (ReuseCandidate.isDefined()) {
+ reuseValue();
+ Changed = true;
+ Continue = true;
+ }
+ llvm::for_each(Dependences, std::default_delete<DepChain>());
+ } while (Continue);
+ return Changed;
+}
+
+void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
+ DepChain &D) {
+ PHINode *PN = dyn_cast<PHINode>(I);
+ if (!PN) {
+ D.push_back(I);
+ return;
+ } else {
+ auto NumIncomingValues = PN->getNumIncomingValues();
+ if (NumIncomingValues != 2) {
+ D.clear();
+ return;
+ }
+
+ BasicBlock *BB = PN->getParent();
+ if (BB != CurLoop->getHeader()) {
+ D.clear();
+ return;
+ }
+
+ Value *BEVal = PN->getIncomingValueForBlock(BB);
+ Instruction *BEInst = dyn_cast<Instruction>(BEVal);
+ // This is a single block loop with a preheader, so at least
+ // one value should come over the backedge.
+ assert(BEInst && "There should be a value over the backedge");
+
+ Value *PreHdrVal =
+ PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
+ if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
+ D.clear();
+ return;
+ }
+ D.push_back(PN);
+ findDepChainFromPHI(BEInst, D);
+ }
+}
+
+DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
+ Instruction *I2,
+ int Iters) {
+ for (auto *D : Dependences) {
+ if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
+ return D;
+ }
+ return nullptr;
+}
+
+void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
+ BasicBlock *BB = CurLoop->getHeader();
+ for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
+ auto *PN = cast<PHINode>(I);
+ if (!isa<VectorType>(PN->getType()))
+ continue;
+
+ DepChain *D = new DepChain();
+ findDepChainFromPHI(PN, *D);
+ if (D->size() != 0)
+ Dependences.insert(D);
+ else
+ delete D;
+ }
+ LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
+ LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
+ ++i) { dbgs() << *Dependences[i] << "\n"; });
+}
+
+Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
+ return new HexagonVectorLoopCarriedReuse();
+}