diff options
Diffstat (limited to 'lib/Transforms/Scalar/EarlyCSE.cpp')
| -rw-r--r-- | lib/Transforms/Scalar/EarlyCSE.cpp | 168 | 
1 files changed, 126 insertions, 42 deletions
| diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index c5c9b2c185d6..5798e1c4ee99 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -13,9 +13,12 @@  //===----------------------------------------------------------------------===//  #include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/ADT/DenseMapInfo.h"  #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/ScopedHashTable.h"  #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/GlobalsModRef.h" @@ -24,18 +27,37 @@  #include "llvm/Analysis/MemorySSAUpdater.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h"  #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h"  #include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/RecyclingAllocator.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Scalar.h"  #include "llvm/Transforms/Utils/Local.h" +#include <cassert>  #include <deque> +#include <memory> +#include <utility> +  using namespace llvm;  using namespace llvm::PatternMatch; @@ -53,6 +75,7 @@ STATISTIC(NumDSE,      "Number of trivial dead stores removed");  //===----------------------------------------------------------------------===//  namespace { +  /// \brief Struct representing the available values in the scoped hash table.  struct SimpleValue {    Instruction *Inst; @@ -77,20 +100,25 @@ struct SimpleValue {             isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);    }  }; -} + +} // end anonymous namespace  namespace llvm { +  template <> struct DenseMapInfo<SimpleValue> {    static inline SimpleValue getEmptyKey() {      return DenseMapInfo<Instruction *>::getEmptyKey();    } +    static inline SimpleValue getTombstoneKey() {      return DenseMapInfo<Instruction *>::getTombstoneKey();    } +    static unsigned getHashValue(SimpleValue Val);    static bool isEqual(SimpleValue LHS, SimpleValue RHS);  }; -} + +} // end namespace llvm  unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {    Instruction *Inst = Val.Inst; @@ -115,6 +143,21 @@ unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {      return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);    } +  // Hash min/max/abs (cmp + select) to allow for commuted operands. +  // Min/max may also have non-canonical compare predicate (eg, the compare for +  // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the +  // compare. +  Value *A, *B; +  SelectPatternFlavor SPF = matchSelectPattern(Inst, A, B).Flavor; +  // TODO: We should also detect FP min/max. +  if (SPF == SPF_SMIN || SPF == SPF_SMAX || +      SPF == SPF_UMIN || SPF == SPF_UMAX || +      SPF == SPF_ABS || SPF == SPF_NABS) { +    if (A > B) +      std::swap(A, B); +    return hash_combine(Inst->getOpcode(), SPF, A, B); +  } +    if (CastInst *CI = dyn_cast<CastInst>(Inst))      return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0)); @@ -173,6 +216,20 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {             LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();    } +  // Min/max/abs can occur with commuted operands, non-canonical predicates, +  // and/or non-canonical operands. +  Value *LHSA, *LHSB; +  SelectPatternFlavor LSPF = matchSelectPattern(LHSI, LHSA, LHSB).Flavor; +  // TODO: We should also detect FP min/max. +  if (LSPF == SPF_SMIN || LSPF == SPF_SMAX || +      LSPF == SPF_UMIN || LSPF == SPF_UMAX || +      LSPF == SPF_ABS || LSPF == SPF_NABS) { +    Value *RHSA, *RHSB; +    SelectPatternFlavor RSPF = matchSelectPattern(RHSI, RHSA, RHSB).Flavor; +    return (LSPF == RSPF && ((LHSA == RHSA && LHSB == RHSB) || +                             (LHSA == RHSB && LHSB == RHSA))); +  } +    return false;  } @@ -181,6 +238,7 @@ bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {  //===----------------------------------------------------------------------===//  namespace { +  /// \brief Struct representing the available call values in the scoped hash  /// table.  struct CallValue { @@ -206,20 +264,25 @@ struct CallValue {      return true;    }  }; -} + +} // end anonymous namespace  namespace llvm { +  template <> struct DenseMapInfo<CallValue> {    static inline CallValue getEmptyKey() {      return DenseMapInfo<Instruction *>::getEmptyKey();    } +    static inline CallValue getTombstoneKey() {      return DenseMapInfo<Instruction *>::getTombstoneKey();    } +    static unsigned getHashValue(CallValue Val);    static bool isEqual(CallValue LHS, CallValue RHS);  }; -} + +} // end namespace llvm  unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {    Instruction *Inst = Val.Inst; @@ -241,6 +304,7 @@ bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {  //===----------------------------------------------------------------------===//  namespace { +  /// \brief A simple and fast domtree-based CSE pass.  ///  /// This pass does a simple depth-first walk over the dominator tree, @@ -257,10 +321,13 @@ public:    const SimplifyQuery SQ;    MemorySSA *MSSA;    std::unique_ptr<MemorySSAUpdater> MSSAUpdater; -  typedef RecyclingAllocator< -      BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy; -  typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>, -                          AllocatorTy> ScopedHTType; + +  using AllocatorTy = +      RecyclingAllocator<BumpPtrAllocator, +                         ScopedHashTableVal<SimpleValue, Value *>>; +  using ScopedHTType = +      ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>, +                      AllocatorTy>;    /// \brief A scoped hash table of the current values of all of our simple    /// scalar expressions. @@ -285,44 +352,45 @@ public:    /// present the table; it is the responsibility of the consumer to inspect    /// the atomicity/volatility if needed.    struct LoadValue { -    Instruction *DefInst; -    unsigned Generation; -    int MatchingId; -    bool IsAtomic; -    bool IsInvariant; -    LoadValue() -        : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(false), -          IsInvariant(false) {} +    Instruction *DefInst = nullptr; +    unsigned Generation = 0; +    int MatchingId = -1; +    bool IsAtomic = false; +    bool IsInvariant = false; + +    LoadValue() = default;      LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,                bool IsAtomic, bool IsInvariant)          : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),            IsAtomic(IsAtomic), IsInvariant(IsInvariant) {}    }; -  typedef RecyclingAllocator<BumpPtrAllocator, -                             ScopedHashTableVal<Value *, LoadValue>> -      LoadMapAllocator; -  typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>, -                          LoadMapAllocator> LoadHTType; + +  using LoadMapAllocator = +      RecyclingAllocator<BumpPtrAllocator, +                         ScopedHashTableVal<Value *, LoadValue>>; +  using LoadHTType = +      ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>, +                      LoadMapAllocator>; +    LoadHTType AvailableLoads;    /// \brief A scoped hash table of the current values of read-only call    /// values.    ///    /// It uses the same generation count as loads. -  typedef ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>> -      CallHTType; +  using CallHTType = +      ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;    CallHTType AvailableCalls;    /// \brief This is the current generation of the memory value. -  unsigned CurrentGeneration; +  unsigned CurrentGeneration = 0;    /// \brief Set up the EarlyCSE runner for a particular function.    EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,             const TargetTransformInfo &TTI, DominatorTree &DT,             AssumptionCache &AC, MemorySSA *MSSA)        : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), -        MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) { -  } +        MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}    bool run(); @@ -336,11 +404,10 @@ private:                CallHTType &AvailableCalls)          : Scope(AvailableValues), LoadScope(AvailableLoads),            CallScope(AvailableCalls) {} - -  private:      NodeScope(const NodeScope &) = delete; -    void operator=(const NodeScope &) = delete; +    NodeScope &operator=(const NodeScope &) = delete; +  private:      ScopedHTType::ScopeTy Scope;      LoadHTType::ScopeTy LoadScope;      CallHTType::ScopeTy CallScope; @@ -356,8 +423,10 @@ private:                CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,                DomTreeNode::iterator child, DomTreeNode::iterator end)          : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child), -          EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls), -          Processed(false) {} +          EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls) +          {} +    StackNode(const StackNode &) = delete; +    StackNode &operator=(const StackNode &) = delete;      // Accessors.      unsigned currentGeneration() { return CurrentGeneration; } @@ -365,27 +434,25 @@ private:      void childGeneration(unsigned generation) { ChildGeneration = generation; }      DomTreeNode *node() { return Node; }      DomTreeNode::iterator childIter() { return ChildIter; } +      DomTreeNode *nextChild() {        DomTreeNode *child = *ChildIter;        ++ChildIter;        return child;      } +      DomTreeNode::iterator end() { return EndIter; }      bool isProcessed() { return Processed; }      void process() { Processed = true; }    private: -    StackNode(const StackNode &) = delete; -    void operator=(const StackNode &) = delete; - -    // Members.      unsigned CurrentGeneration;      unsigned ChildGeneration;      DomTreeNode *Node;      DomTreeNode::iterator ChildIter;      DomTreeNode::iterator EndIter;      NodeScope Scopes; -    bool Processed; +    bool Processed = false;    };    /// \brief Wrapper class to handle memory instructions, including loads, @@ -393,24 +460,28 @@ private:    class ParseMemoryInst {    public:      ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) -      : IsTargetMemInst(false), Inst(Inst) { +      : Inst(Inst) {        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))          if (TTI.getTgtMemIntrinsic(II, Info))            IsTargetMemInst = true;      } +      bool isLoad() const {        if (IsTargetMemInst) return Info.ReadMem;        return isa<LoadInst>(Inst);      } +      bool isStore() const {        if (IsTargetMemInst) return Info.WriteMem;        return isa<StoreInst>(Inst);      } +      bool isAtomic() const {        if (IsTargetMemInst)          return Info.Ordering != AtomicOrdering::NotAtomic;        return Inst->isAtomic();      } +      bool isUnordered() const {        if (IsTargetMemInst)          return Info.isUnordered(); @@ -447,6 +518,7 @@ private:        return (getPointerOperand() == Inst.getPointerOperand() &&                getMatchingId() == Inst.getMatchingId());      } +      bool isValid() const { return getPointerOperand() != nullptr; }      // For regular (non-intrinsic) loads/stores, this is set to -1. For @@ -457,6 +529,7 @@ private:        if (IsTargetMemInst) return Info.MatchingId;        return -1;      } +      Value *getPointerOperand() const {        if (IsTargetMemInst) return Info.PtrVal;        if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { @@ -466,17 +539,19 @@ private:        }        return nullptr;      } +      bool mayReadFromMemory() const {        if (IsTargetMemInst) return Info.ReadMem;        return Inst->mayReadFromMemory();      } +      bool mayWriteToMemory() const {        if (IsTargetMemInst) return Info.WriteMem;        return Inst->mayWriteToMemory();      }    private: -    bool IsTargetMemInst; +    bool IsTargetMemInst = false;      MemIntrinsicInfo Info;      Instruction *Inst;    }; @@ -524,8 +599,8 @@ private:          for (MemoryPhi *MP : PhisToCheck) {            MemoryAccess *FirstIn = MP->getIncomingValue(0); -          if (all_of(MP->incoming_values(), -                     [=](Use &In) { return In == FirstIn; })) +          if (llvm::all_of(MP->incoming_values(), +                           [=](Use &In) { return In == FirstIn; }))              WorkQueue.push_back(MP);          }          PhisToCheck.clear(); @@ -533,7 +608,8 @@ private:      }    }  }; -} + +} // end anonymous namespace  /// Determine if the memory referenced by LaterInst is from the same heap  /// version as EarlierInst. @@ -663,6 +739,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {        continue;      } +    // Skip sideeffect intrinsics, for the same reason as assume intrinsics. +    if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) { +      DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n'); +      continue; +    } +      // Skip invariant.start intrinsics since they only read memory, and we can      // forward values across it. Also, we dont need to consume the last store      // since the semantics of invariant.start allow us to perform DSE of the @@ -1014,6 +1096,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,  }  namespace { +  /// \brief A simple and fast domtree-based CSE pass.  ///  /// This pass does a simple depth-first walk over the dominator tree, @@ -1062,7 +1145,8 @@ public:      AU.setPreservesCFG();    }  }; -} + +} // end anonymous namespace  using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>; | 
