diff options
Diffstat (limited to 'lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp')
| -rw-r--r-- | lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp | 636 | 
1 files changed, 636 insertions, 0 deletions
diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp new file mode 100644 index 000000000000..39395dbd3aec --- /dev/null +++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -0,0 +1,636 @@ +//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass removes the computation of provably redundant expressions that have +// been computed earlier in a previous iteration. It relies on the use of PHIs +// to identify loop carried dependences. This is scalar replacement for vector +// types. +// +//----------------------------------------------------------------------------- +// Motivation: Consider the case where we have the following loop structure. +// +// Loop: +//  t0 = a[i]; +//  t1 = f(t0); +//  t2 = g(t1); +//  ... +//  t3 = a[i+1]; +//  t4 = f(t3); +//  t5 = g(t4); +//  t6 = op(t2, t5) +//  cond_branch <Loop> +// +// This can be converted to +//  t00 = a[0]; +//  t10 = f(t00); +//  t20 = g(t10); +// Loop: +//  t2 = t20; +//  t3 = a[i+1]; +//  t4 = f(t3); +//  t5 = g(t4); +//  t6 = op(t2, t5) +//  t20 = t5 +//  cond_branch <Loop> +// +// SROA does a good job of reusing a[i+1] as a[i] in the next iteration. +// Such a loop comes to this pass in the following form. +// +// LoopPreheader: +//  X0 = a[0]; +// Loop: +//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)> +//  t1 = f(X2)   <-- I1 +//  t2 = g(t1) +//  ... +//  X1 = a[i+1] +//  t4 = f(X1)   <-- I2 +//  t5 = g(t4) +//  t6 = op(t2, t5) +//  cond_branch <Loop> +// +// In this pass, we look for PHIs such as X2 whose incoming values come only +// from the Loop Preheader and over the backedge and additionaly, both these +// values are the results of the same operation in terms of opcode. We call such +// a PHI node a dependence chain or DepChain. In this case, the dependence of X2 +// over X1 is carried over only one iteration and so the DepChain is only one +// PHI node long. +// +// Then, we traverse the uses of the PHI (X2) and the uses of the value of the +// PHI coming  over the backedge (X1). We stop at the first pair of such users +// I1 (of X2) and I2 (of X1) that meet the following conditions. +// 1. I1 and I2 are the same operation, but with different operands. +// 2. X2 and X1 are used at the same operand number in the two instructions. +// 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a +//    a DepChain from Op1 to Op2 of the same length as that between X2 and X1. +// +// We then make the following transformation +// LoopPreheader: +//  X0 = a[0]; +//  Y0 = f(X0); +// Loop: +//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)> +//  Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)> +//  t1 = f(X2)   <-- Will be removed by DCE. +//  t2 = g(Y2) +//  ... +//  X1 = a[i+1] +//  t4 = f(X1) +//  t5 = g(t4) +//  t6 = op(t2, t5) +//  cond_branch <Loop> +// +// We proceed until we cannot find any more such instructions I1 and I2. +// +// --- DepChains & Loop carried dependences --- +// Consider a single basic block loop such as +// +// LoopPreheader: +//  X0 = ... +//  Y0 = ... +// Loop: +//  X2 = PHI<(X0, LoopPreheader), (X1, Loop)> +//  Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)> +//  ... +//  X1 = ... +//  ... +//  cond_branch <Loop> +// +// Then there is a dependence between X2 and X1 that goes back one iteration, +// i.e. X1 is used as X2 in the very next iteration. We represent this as a +// DepChain from X2 to X1 (X2->X1). +// Similarly, there is a dependence between Y2 and X1 that goes back two +// iterations. X1 is used as Y2 two iterations after it is computed. This is +// represented by a DepChain as (Y2->X2->X1). +// +// A DepChain has the following properties. +// 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of +//    iterations of carried dependence + 1. +// 2. All instructions in the DepChain except the last are PHIs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <map> +#include <memory> +#include <set> + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-vlcr" + +STATISTIC(HexagonNumVectorLoopCarriedReuse, +          "Number of values that were reused from a previous iteration."); + +static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", +    cl::Hidden, +    cl::desc("Maximum distance of loop carried dependences that are handled"), +    cl::init(2), cl::ZeroOrMore); + +namespace llvm { + +void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); +Pass *createHexagonVectorLoopCarriedReusePass(); + +} // end namespace llvm + +namespace { + +  // See info about DepChain in the comments at the top of this file. +  using ChainOfDependences = SmallVector<Instruction *, 4>; + +  class DepChain { +    ChainOfDependences Chain; + +  public: +    bool isIdentical(DepChain &Other) const { +      if (Other.size() != size()) +        return false; +      ChainOfDependences &OtherChain = Other.getChain(); +      for (int i = 0; i < size(); ++i) { +        if (Chain[i] != OtherChain[i]) +          return false; +      } +      return true; +    } + +    ChainOfDependences &getChain() { +      return Chain; +    } + +    int size() const { +      return Chain.size(); +    } + +    void clear() { +      Chain.clear(); +    } + +    void push_back(Instruction *I) { +      Chain.push_back(I); +    } + +    int iterations() const { +      return size() - 1; +    } + +    Instruction *front() const { +      return Chain.front(); +    } + +    Instruction *back() const { +      return Chain.back(); +    } + +    Instruction *&operator[](const int index) { +      return Chain[index]; +    } + +   friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); +  }; + +  LLVM_ATTRIBUTE_UNUSED +  raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) { +    const ChainOfDependences &CD = D.Chain; +    int ChainSize = CD.size(); +    OS << "**DepChain Start::**\n"; +    for (int i = 0; i < ChainSize -1; ++i) { +      OS << *(CD[i]) << " -->\n"; +    } +    OS << *CD[ChainSize-1] << "\n"; +    return OS; +  } + +  struct ReuseValue { +    Instruction *Inst2Replace = nullptr; + +    // In the new PHI node that we'll construct this is the value that'll be +    // used over the backedge. This is teh value that gets reused from a +    // previous iteration. +    Instruction *BackedgeInst = nullptr; + +    ReuseValue() = default; + +    void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; } +    bool isDefined() { return Inst2Replace != nullptr; } +  }; + +  LLVM_ATTRIBUTE_UNUSED +  raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { +    OS << "** ReuseValue ***\n"; +    OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n"; +    OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; +    return OS; +  } + +  class HexagonVectorLoopCarriedReuse : public LoopPass { +  public: +    static char ID; + +    explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) { +      PassRegistry *PR = PassRegistry::getPassRegistry(); +      initializeHexagonVectorLoopCarriedReusePass(*PR); +    } + +    StringRef getPassName() const override { +      return "Hexagon-specific loop carried reuse for HVX vectors"; +    } + +    void getAnalysisUsage(AnalysisUsage &AU) const override { +      AU.addRequired<LoopInfoWrapperPass>(); +      AU.addRequiredID(LoopSimplifyID); +      AU.addRequiredID(LCSSAID); +      AU.addPreservedID(LCSSAID); +      AU.setPreservesCFG(); +    } + +    bool runOnLoop(Loop *L, LPPassManager &LPM) override; + +  private: +    SetVector<DepChain *> Dependences; +    std::set<Instruction *> ReplacedInsts; +    Loop *CurLoop; +    ReuseValue ReuseCandidate; + +    bool doVLCR(); +    void findLoopCarriedDeps(); +    void findValueToReuse(); +    void findDepChainFromPHI(Instruction *I, DepChain &D); +    void reuseValue(); +    Value *findValueInBlock(Value *Op, BasicBlock *BB); +    bool isDepChainBtwn(Instruction *I1, Instruction *I2, int Iters); +    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2); +    bool isEquivalentOperation(Instruction *I1, Instruction *I2); +    bool canReplace(Instruction *I); +  }; + +} // end anonymous namespace + +char HexagonVectorLoopCarriedReuse::ID = 0; + +INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", +    "Hexagon-specific predictive commoning for HVX vectors", false, false) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) +INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr", +    "Hexagon-specific predictive commoning for HVX vectors", false, false) + +bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) { +  if (skipLoop(L)) +    return false; + +  if (!L->getLoopPreheader()) +    return false; + +  // Work only on innermost loops. +  if (!L->getSubLoops().empty()) +    return false; + +  // Work only on single basic blocks loops. +  if (L->getNumBlocks() != 1) +    return false; + +  CurLoop = L; + +  return doVLCR(); +} + +bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, +                                                          Instruction *I2) { +  if (!I1->isSameOperationAs(I2)) +    return false; +  // This check is in place specifically for intrinsics. isSameOperationAs will +  // return two for any two hexagon intrinsics because they are essentially the +  // same instruciton (CallInst). We need to scratch the surface to see if they +  // are calls to the same function. +  if (CallInst *C1 = dyn_cast<CallInst>(I1)) { +    if (CallInst *C2 = dyn_cast<CallInst>(I2)) { +      if (C1->getCalledFunction() != C2->getCalledFunction()) +        return false; +    } +  } + +  // If both the Instructions are of Vector Type and any of the element +  // is integer constant, check their values too for equivalence. +  if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { +    unsigned NumOperands = I1->getNumOperands(); +    for (unsigned i = 0; i < NumOperands; ++i) { +      ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i)); +      ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i)); +      if(!C1) continue; +      assert(C2); +      if (C1->getSExtValue() != C2->getSExtValue()) +        return false; +    } +  } + +  return true; +} + +bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { +  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); +  if (II && +      (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi || +       II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) { +    DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n"); +    return false; +  } +  return true; +} +void HexagonVectorLoopCarriedReuse::findValueToReuse() { +  for (auto *D : Dependences) { +    DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n"); +    if (D->iterations() > HexagonVLCRIterationLim) { +      DEBUG(dbgs() << +            ".. Skipping because number of iterations > than the limit\n"); +      continue; +    } + +    PHINode *PN = cast<PHINode>(D->front()); +    Instruction *BEInst = D->back(); +    int Iters = D->iterations(); +    BasicBlock *BB = PN->getParent(); +    DEBUG(dbgs() << "Checking if any uses of " << *PN << " can be reused\n"); + +    SmallVector<Instruction *, 4> PNUsers; +    for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { +      Use &U = *UI; +      Instruction *User = cast<Instruction>(U.getUser()); + +      if (User->getParent() != BB) +        continue; +      if (ReplacedInsts.count(User)) { +        DEBUG(dbgs() << *User << " has already been replaced. Skipping...\n"); +        continue; +      } +      if (isa<PHINode>(User)) +        continue; +      if (User->mayHaveSideEffects()) +        continue; +      if (!canReplace(User)) +        continue; + +      PNUsers.push_back(User); +    } +    DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n"); + +    // For each interesting use I of PN, find an Instruction BEUser that +    // performs the same operation as I on BEInst and whose other operands, +    // if any, can also be rematerialized in OtherBB. We stop when we find the +    // first such Instruction BEUser. This is because once BEUser is +    // rematerialized in OtherBB, we may find more such "fixup" opportunities +    // in this block. So, we'll start over again. +    for (Instruction *I : PNUsers) { +      for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E; +           ++UI) { +        Use &U = *UI; +        Instruction *BEUser = cast<Instruction>(U.getUser()); + +        if (BEUser->getParent() != BB) +          continue; +        if (!isEquivalentOperation(I, BEUser)) +          continue; + +        int NumOperands = I->getNumOperands(); + +        for (int OpNo = 0; OpNo < NumOperands; ++OpNo) { +          Value *Op = I->getOperand(OpNo); +          Instruction *OpInst = dyn_cast<Instruction>(Op); +          if (!OpInst) +            continue; + +          Value *BEOp = BEUser->getOperand(OpNo); +          Instruction *BEOpInst = dyn_cast<Instruction>(BEOp); + +          if (!isDepChainBtwn(OpInst, BEOpInst, Iters)) { +            BEUser = nullptr; +            break; +          } +        } +        if (BEUser) { +          DEBUG(dbgs() << "Found Value for reuse.\n"); +          ReuseCandidate.Inst2Replace = I; +          ReuseCandidate.BackedgeInst = BEUser; +          return; +        } else +          ReuseCandidate.reset(); +      } +    } +  } +  ReuseCandidate.reset(); +} + +Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, +                                                       BasicBlock *BB) { +  PHINode *PN = dyn_cast<PHINode>(Op); +  assert(PN); +  Value *ValueInBlock = PN->getIncomingValueForBlock(BB); +  return ValueInBlock; +} + +void HexagonVectorLoopCarriedReuse::reuseValue() { +  DEBUG(dbgs() << ReuseCandidate); +  Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; +  Instruction *BEInst = ReuseCandidate.BackedgeInst; +  int NumOperands = Inst2Replace->getNumOperands(); +  std::map<Instruction *, DepChain *> DepChains; +  int Iterations = -1; +  BasicBlock *LoopPH = CurLoop->getLoopPreheader(); + +  for (int i = 0; i < NumOperands; ++i) { +    Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(i)); +    if(!I) +      continue; +    else { +      Instruction *J = cast<Instruction>(BEInst->getOperand(i)); +      DepChain *D = getDepChainBtwn(I, J); + +      assert(D && +             "No DepChain between corresponding operands in ReuseCandidate\n"); +      if (Iterations == -1) +        Iterations = D->iterations(); +      assert(Iterations == D->iterations() && "Iterations mismatch"); +      DepChains[I] = D; +    } +  } + +  DEBUG(dbgs() << "reuseValue is making the following changes\n"); + +  SmallVector<Instruction *, 4> InstsInPreheader; +  for (int i = 0; i < Iterations; ++i) { +    Instruction *InstInPreheader = Inst2Replace->clone(); +    SmallVector<Value *, 4> Ops; +    for (int j = 0; j < NumOperands; ++j) { +      Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j)); +      if (!I) +        continue; +      // Get the DepChain corresponding to this operand. +      DepChain &D = *DepChains[I]; +      // Get the PHI for the iteration number and find +      // the incoming value from the Loop Preheader for +      // that PHI. +      Value *ValInPreheader = findValueInBlock(D[i], LoopPH); +      InstInPreheader->setOperand(j, ValInPreheader); +    } +    InstsInPreheader.push_back(InstInPreheader); +    InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); +    InstInPreheader->insertBefore(LoopPH->getTerminator()); +    DEBUG(dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName() +          << "\n"); +  } +  BasicBlock *BB = BEInst->getParent(); +  IRBuilder<> IRB(BB); +  IRB.SetInsertPoint(BB->getFirstNonPHI()); +  Value *BEVal = BEInst; +  PHINode *NewPhi; +  for (int i = Iterations-1; i >=0 ; --i) { +    Instruction *InstInPreheader = InstsInPreheader[i]; +    NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); +    NewPhi->addIncoming(InstInPreheader, LoopPH); +    NewPhi->addIncoming(BEVal, BB); +    DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n"); +    BEVal = NewPhi; +  } +  // We are in LCSSA form. So, a value defined inside the Loop is used only +  // inside the loop. So, the following is safe. +  Inst2Replace->replaceAllUsesWith(NewPhi); +  ReplacedInsts.insert(Inst2Replace); +  ++HexagonNumVectorLoopCarriedReuse; +} + +bool HexagonVectorLoopCarriedReuse::doVLCR() { +  assert(CurLoop->getSubLoops().empty() && +         "Can do VLCR on the innermost loop only"); +  assert((CurLoop->getNumBlocks() == 1) && +         "Can do VLCR only on single block loops"); + +  bool Changed = false; +  bool Continue; + +  DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); +  do { +    // Reset datastructures. +    Dependences.clear(); +    Continue = false; + +    findLoopCarriedDeps(); +    findValueToReuse(); +    if (ReuseCandidate.isDefined()) { +      reuseValue(); +      Changed = true; +      Continue = true; +    } +    llvm::for_each(Dependences, std::default_delete<DepChain>()); +  } while (Continue); +  return Changed; +} + +void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, +                                                        DepChain &D) { +  PHINode *PN = dyn_cast<PHINode>(I); +  if (!PN) { +    D.push_back(I); +    return; +  } else { +    auto NumIncomingValues = PN->getNumIncomingValues(); +    if (NumIncomingValues != 2) { +      D.clear(); +      return; +    } + +    BasicBlock *BB = PN->getParent(); +    if (BB != CurLoop->getHeader()) { +      D.clear(); +      return; +    } + +    Value *BEVal = PN->getIncomingValueForBlock(BB); +    Instruction *BEInst = dyn_cast<Instruction>(BEVal); +    // This is a single block loop with a preheader, so at least +    // one value should come over the backedge. +    assert(BEInst && "There should be a value over the backedge"); + +    Value *PreHdrVal = +      PN->getIncomingValueForBlock(CurLoop->getLoopPreheader()); +    if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) { +      D.clear(); +      return; +    } +    D.push_back(PN); +    findDepChainFromPHI(BEInst, D); +  } +} + +bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1, +                                                      Instruction *I2, +                                                      int Iters) { +  for (auto *D : Dependences) { +    if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters) +      return true; +  } +  return false; +} + +DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, +                                                            Instruction *I2) { +  for (auto *D : Dependences) { +    if (D->front() == I1 && D->back() == I2) +      return D; +  } +  return nullptr; +} + +void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { +  BasicBlock *BB = CurLoop->getHeader(); +  for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { +    auto *PN = cast<PHINode>(I); +    if (!isa<VectorType>(PN->getType())) +      continue; + +    DepChain *D = new DepChain(); +    findDepChainFromPHI(PN, *D); +    if (D->size() != 0) +      Dependences.insert(D); +    else +      delete D; +  } +  DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); +  DEBUG(for (size_t i = 0; i < Dependences.size(); ++i) { +      dbgs() << *Dependences[i] << "\n"; +    }); +} + +Pass *llvm::createHexagonVectorLoopCarriedReusePass() { +  return new HexagonVectorLoopCarriedReuse(); +}  | 
