diff options
Diffstat (limited to 'lib/Transforms/Scalar/SROA.cpp')
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 509 |
1 files changed, 291 insertions, 218 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index bfe3754f0769..6c3f012c6280 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -42,6 +42,8 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantFolder.h" @@ -79,7 +81,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include <algorithm> #include <cassert> @@ -124,14 +125,9 @@ static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices", static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden); -/// Hidden option to allow more aggressive splitting. -static cl::opt<bool> -SROASplitNonWholeAllocaSlices("sroa-split-nonwhole-alloca-slices", - cl::init(false), cl::Hidden); - namespace { -/// \brief A custom IRBuilder inserter which prefixes all names, but only in +/// A custom IRBuilder inserter which prefixes all names, but only in /// Assert builds. class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter { std::string Prefix; @@ -151,23 +147,23 @@ protected: } }; -/// \brief Provide a type for IRBuilder that drops names in release builds. +/// Provide a type for IRBuilder that drops names in release builds. using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>; -/// \brief A used slice of an alloca. +/// A used slice of an alloca. /// /// This structure represents a slice of an alloca used by some instruction. It /// stores both the begin and end offsets of this use, a pointer to the use /// itself, and a flag indicating whether we can classify the use as splittable /// or not when forming partitions of the alloca. class Slice { - /// \brief The beginning offset of the range. + /// The beginning offset of the range. uint64_t BeginOffset = 0; - /// \brief The ending offset, not included in the range. + /// The ending offset, not included in the range. uint64_t EndOffset = 0; - /// \brief Storage for both the use of this slice and whether it can be + /// Storage for both the use of this slice and whether it can be /// split. PointerIntPair<Use *, 1, bool> UseAndIsSplittable; @@ -189,7 +185,7 @@ public: bool isDead() const { return getUse() == nullptr; } void kill() { UseAndIsSplittable.setPointer(nullptr); } - /// \brief Support for ordering ranges. + /// Support for ordering ranges. /// /// This provides an ordering over ranges such that start offsets are /// always increasing, and within equal start offsets, the end offsets are @@ -207,7 +203,7 @@ public: return false; } - /// \brief Support comparison with a single offset to allow binary searches. + /// Support comparison with a single offset to allow binary searches. friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS, uint64_t RHSOffset) { return LHS.beginOffset() < RHSOffset; @@ -233,7 +229,7 @@ template <> struct isPodLike<Slice> { static const bool value = true; }; } // end namespace llvm -/// \brief Representation of the alloca slices. +/// Representation of the alloca slices. /// /// This class represents the slices of an alloca which are formed by its /// various uses. If a pointer escapes, we can't fully build a representation @@ -242,16 +238,16 @@ template <> struct isPodLike<Slice> { static const bool value = true; }; /// starting at a particular offset before splittable slices. class llvm::sroa::AllocaSlices { public: - /// \brief Construct the slices of a particular alloca. + /// Construct the slices of a particular alloca. AllocaSlices(const DataLayout &DL, AllocaInst &AI); - /// \brief Test whether a pointer to the allocation escapes our analysis. + /// Test whether a pointer to the allocation escapes our analysis. /// /// If this is true, the slices are never fully built and should be /// ignored. bool isEscaped() const { return PointerEscapingInstr; } - /// \brief Support for iterating over the slices. + /// Support for iterating over the slices. /// @{ using iterator = SmallVectorImpl<Slice>::iterator; using range = iterator_range<iterator>; @@ -266,10 +262,10 @@ public: const_iterator end() const { return Slices.end(); } /// @} - /// \brief Erase a range of slices. + /// Erase a range of slices. void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); } - /// \brief Insert new slices for this alloca. + /// Insert new slices for this alloca. /// /// This moves the slices into the alloca's slices collection, and re-sorts /// everything so that the usual ordering properties of the alloca's slices @@ -278,7 +274,7 @@ public: int OldSize = Slices.size(); Slices.append(NewSlices.begin(), NewSlices.end()); auto SliceI = Slices.begin() + OldSize; - std::sort(SliceI, Slices.end()); + llvm::sort(SliceI, Slices.end()); std::inplace_merge(Slices.begin(), SliceI, Slices.end()); } @@ -287,10 +283,10 @@ public: class partition_iterator; iterator_range<partition_iterator> partitions(); - /// \brief Access the dead users for this alloca. + /// Access the dead users for this alloca. ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; } - /// \brief Access the dead operands referring to this alloca. + /// Access the dead operands referring to this alloca. /// /// These are operands which have cannot actually be used to refer to the /// alloca as they are outside its range and the user doesn't correct for @@ -316,11 +312,11 @@ private: friend class AllocaSlices::SliceBuilder; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - /// \brief Handle to alloca instruction to simplify method interfaces. + /// Handle to alloca instruction to simplify method interfaces. AllocaInst &AI; #endif - /// \brief The instruction responsible for this alloca not having a known set + /// The instruction responsible for this alloca not having a known set /// of slices. /// /// When an instruction (potentially) escapes the pointer to the alloca, we @@ -328,7 +324,7 @@ private: /// alloca. This will be null if the alloca slices are analyzed successfully. Instruction *PointerEscapingInstr; - /// \brief The slices of the alloca. + /// The slices of the alloca. /// /// We store a vector of the slices formed by uses of the alloca here. This /// vector is sorted by increasing begin offset, and then the unsplittable @@ -336,7 +332,7 @@ private: /// details. SmallVector<Slice, 8> Slices; - /// \brief Instructions which will become dead if we rewrite the alloca. + /// Instructions which will become dead if we rewrite the alloca. /// /// Note that these are not separated by slice. This is because we expect an /// alloca to be completely rewritten or not rewritten at all. If rewritten, @@ -344,7 +340,7 @@ private: /// they come from outside of the allocated space. SmallVector<Instruction *, 8> DeadUsers; - /// \brief Operands which will become dead if we rewrite the alloca. + /// Operands which will become dead if we rewrite the alloca. /// /// These are operands that in their particular use can be replaced with /// undef when we rewrite the alloca. These show up in out-of-bounds inputs @@ -355,7 +351,7 @@ private: SmallVector<Use *, 8> DeadOperands; }; -/// \brief A partition of the slices. +/// A partition of the slices. /// /// An ephemeral representation for a range of slices which can be viewed as /// a partition of the alloca. This range represents a span of the alloca's @@ -371,32 +367,32 @@ private: using iterator = AllocaSlices::iterator; - /// \brief The beginning and ending offsets of the alloca for this + /// The beginning and ending offsets of the alloca for this /// partition. uint64_t BeginOffset, EndOffset; - /// \brief The start and end iterators of this partition. + /// The start and end iterators of this partition. iterator SI, SJ; - /// \brief A collection of split slice tails overlapping the partition. + /// A collection of split slice tails overlapping the partition. SmallVector<Slice *, 4> SplitTails; - /// \brief Raw constructor builds an empty partition starting and ending at + /// Raw constructor builds an empty partition starting and ending at /// the given iterator. Partition(iterator SI) : SI(SI), SJ(SI) {} public: - /// \brief The start offset of this partition. + /// The start offset of this partition. /// /// All of the contained slices start at or after this offset. uint64_t beginOffset() const { return BeginOffset; } - /// \brief The end offset of this partition. + /// The end offset of this partition. /// /// All of the contained slices end at or before this offset. uint64_t endOffset() const { return EndOffset; } - /// \brief The size of the partition. + /// The size of the partition. /// /// Note that this can never be zero. uint64_t size() const { @@ -404,7 +400,7 @@ public: return EndOffset - BeginOffset; } - /// \brief Test whether this partition contains no slices, and merely spans + /// Test whether this partition contains no slices, and merely spans /// a region occupied by split slices. bool empty() const { return SI == SJ; } @@ -421,7 +417,7 @@ public: iterator end() const { return SJ; } /// @} - /// \brief Get the sequence of split slice tails. + /// Get the sequence of split slice tails. /// /// These tails are of slices which start before this partition but are /// split and overlap into the partition. We accumulate these while forming @@ -429,7 +425,7 @@ public: ArrayRef<Slice *> splitSliceTails() const { return SplitTails; } }; -/// \brief An iterator over partitions of the alloca's slices. +/// An iterator over partitions of the alloca's slices. /// /// This iterator implements the core algorithm for partitioning the alloca's /// slices. It is a forward iterator as we don't support backtracking for @@ -443,18 +439,18 @@ class AllocaSlices::partition_iterator Partition> { friend class AllocaSlices; - /// \brief Most of the state for walking the partitions is held in a class + /// Most of the state for walking the partitions is held in a class /// with a nice interface for examining them. Partition P; - /// \brief We need to keep the end of the slices to know when to stop. + /// We need to keep the end of the slices to know when to stop. AllocaSlices::iterator SE; - /// \brief We also need to keep track of the maximum split end offset seen. + /// We also need to keep track of the maximum split end offset seen. /// FIXME: Do we really? uint64_t MaxSplitSliceEndOffset = 0; - /// \brief Sets the partition to be empty at given iterator, and sets the + /// Sets the partition to be empty at given iterator, and sets the /// end iterator. partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) : P(SI), SE(SE) { @@ -464,7 +460,7 @@ class AllocaSlices::partition_iterator advance(); } - /// \brief Advance the iterator to the next partition. + /// Advance the iterator to the next partition. /// /// Requires that the iterator not be at the end of the slices. void advance() { @@ -619,7 +615,7 @@ public: Partition &operator*() { return P; } }; -/// \brief A forward range over the partitions of the alloca's slices. +/// A forward range over the partitions of the alloca's slices. /// /// This accesses an iterator range over the partitions of the alloca's /// slices. It computes these partitions on the fly based on the overlapping @@ -643,7 +639,7 @@ static Value *foldSelectInst(SelectInst &SI) { return nullptr; } -/// \brief A helper that folds a PHI node or a select. +/// A helper that folds a PHI node or a select. static Value *foldPHINodeOrSelectInst(Instruction &I) { if (PHINode *PN = dyn_cast<PHINode>(&I)) { // If PN merges together the same value, return that value. @@ -652,7 +648,7 @@ static Value *foldPHINodeOrSelectInst(Instruction &I) { return foldSelectInst(cast<SelectInst>(I)); } -/// \brief Builder for the alloca slices. +/// Builder for the alloca slices. /// /// This class builds a set of alloca slices by recursively visiting the uses /// of an alloca and making a slice for each load and store at each offset. @@ -668,7 +664,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> { SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap; SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes; - /// \brief Set to de-duplicate dead instructions found in the use walk. + /// Set to de-duplicate dead instructions found in the use walk. SmallPtrSet<Instruction *, 4> VisitedDeadInsts; public: @@ -687,11 +683,12 @@ private: // Completely skip uses which have a zero size or start either before or // past the end of the allocation. if (Size == 0 || Offset.uge(AllocSize)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset - << " which has zero size or starts outside of the " - << AllocSize << " byte alloca:\n" - << " alloca: " << AS.AI << "\n" - << " use: " << I << "\n"); + LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" + << Offset + << " which has zero size or starts outside of the " + << AllocSize << " byte alloca:\n" + << " alloca: " << AS.AI << "\n" + << " use: " << I << "\n"); return markAsDead(I); } @@ -706,10 +703,11 @@ private: // them, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { - DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset - << " to remain within the " << AllocSize << " byte alloca:\n" - << " alloca: " << AS.AI << "\n" - << " use: " << I << "\n"); + LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" + << Offset << " to remain within the " << AllocSize + << " byte alloca:\n" + << " alloca: " << AS.AI << "\n" + << " use: " << I << "\n"); EndOffset = AllocSize; } @@ -802,18 +800,18 @@ private: uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply + // bounds of the allocation, it's behavior is undefined, so simply // ignore it. Note that this is more strict than the generic clamping // behavior of insertUse. We also try to handle cases which might run the // risk of overflow. // FIXME: We should instead consider the pointer to have escaped if this // function is being instrumented for addressing bugs or race conditions. if (Size > AllocSize || Offset.ugt(AllocSize - Size)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset - << " which extends past the end of the " << AllocSize - << " byte alloca:\n" - << " alloca: " << AS.AI << "\n" - << " use: " << SI << "\n"); + LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" + << Offset << " which extends past the end of the " + << AllocSize << " byte alloca:\n" + << " alloca: " << AS.AI << "\n" + << " use: " << SI << "\n"); return markAsDead(SI); } @@ -1027,7 +1025,7 @@ private: void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); } - /// \brief Disable SROA entirely if there are unhandled users of the alloca. + /// Disable SROA entirely if there are unhandled users of the alloca. void visitInstruction(Instruction &I) { PI.setAborted(&I); } }; @@ -1062,7 +1060,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) // Sort the uses. This arranges for the offsets to be in ascending order, // and the sizes to be in descending order. - std::sort(Slices.begin(), Slices.end()); + llvm::sort(Slices.begin(), Slices.end()); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1240,7 +1238,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { } static void speculatePHINodeLoads(PHINode &PN) { - DEBUG(dbgs() << " original: " << PN << "\n"); + LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); IRBuilderTy PHIBuilder(&PN); @@ -1263,10 +1261,21 @@ static void speculatePHINodeLoads(PHINode &PN) { } // Inject loads into all of the pred blocks. + DenseMap<BasicBlock*, Value*> InjectedLoads; for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { BasicBlock *Pred = PN.getIncomingBlock(Idx); - TerminatorInst *TI = Pred->getTerminator(); Value *InVal = PN.getIncomingValue(Idx); + + // A PHI node is allowed to have multiple (duplicated) entries for the same + // basic block, as long as the value is the same. So if we already injected + // a load in the predecessor, then we should reuse the same load for all + // duplicated entries. + if (Value* V = InjectedLoads.lookup(Pred)) { + NewPN->addIncoming(V, Pred); + continue; + } + + TerminatorInst *TI = Pred->getTerminator(); IRBuilderTy PredBuilder(TI); LoadInst *Load = PredBuilder.CreateLoad( @@ -1276,9 +1285,10 @@ static void speculatePHINodeLoads(PHINode &PN) { if (AATags) Load->setAAMetadata(AATags); NewPN->addIncoming(Load, Pred); + InjectedLoads[Pred] = Load; } - DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); + LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); PN.eraseFromParent(); } @@ -1318,7 +1328,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI) { } static void speculateSelectInstLoads(SelectInst &SI) { - DEBUG(dbgs() << " original: " << SI << "\n"); + LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); IRBuilderTy IRB(&SI); Value *TV = SI.getTrueValue(); @@ -1349,14 +1359,14 @@ static void speculateSelectInstLoads(SelectInst &SI) { Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, LI->getName() + ".sroa.speculated"); - DEBUG(dbgs() << " speculated to: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n"); LI->replaceAllUsesWith(V); LI->eraseFromParent(); } SI.eraseFromParent(); } -/// \brief Build a GEP out of a base pointer and indices. +/// Build a GEP out of a base pointer and indices. /// /// This will return the BasePtr if that is valid, or build a new GEP /// instruction using the IRBuilder if GEP-ing is needed. @@ -1374,7 +1384,7 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, NamePrefix + "sroa_idx"); } -/// \brief Get a natural GEP off of the BasePtr walking through Ty toward +/// Get a natural GEP off of the BasePtr walking through Ty toward /// TargetTy without changing the offset of the pointer. /// /// This routine assumes we've already established a properly offset GEP with @@ -1423,7 +1433,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, return buildGEP(IRB, BasePtr, Indices, NamePrefix); } -/// \brief Recursively compute indices for a natural GEP. +/// Recursively compute indices for a natural GEP. /// /// This is the recursive step for getNaturalGEPWithOffset that walks down the /// element types adding appropriate indices for the GEP. @@ -1491,7 +1501,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, Indices, NamePrefix); } -/// \brief Get a natural GEP from a base pointer to a particular offset and +/// Get a natural GEP from a base pointer to a particular offset and /// resulting in a particular type. /// /// The goal is to produce a "natural" looking GEP that works with the existing @@ -1526,7 +1536,7 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Indices, NamePrefix); } -/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the +/// Compute an adjusted pointer from Ptr by Offset bytes where the /// resulting pointer has PointerTy. /// /// This tries very hard to compute a "natural" GEP which arrives at the offset @@ -1635,7 +1645,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, return Ptr; } -/// \brief Compute the adjusted alignment for a load or store from an offset. +/// Compute the adjusted alignment for a load or store from an offset. static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, const DataLayout &DL) { unsigned Alignment; @@ -1656,7 +1666,7 @@ static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, return MinAlign(Alignment, Offset); } -/// \brief Test whether we can convert a value from the old to the new type. +/// Test whether we can convert a value from the old to the new type. /// /// This predicate should be used to guard calls to convertValue in order to /// ensure that we only try to convert viable values. The strategy is that we @@ -1707,7 +1717,7 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { return true; } -/// \brief Generic routine to convert an SSA value to a value of a different +/// Generic routine to convert an SSA value to a value of a different /// type. /// /// This will try various different casting techniques, such as bitcasts, @@ -1759,7 +1769,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, return IRB.CreateBitCast(V, NewTy); } -/// \brief Test whether the given slice use can be promoted to a vector. +/// Test whether the given slice use can be promoted to a vector. /// /// This function is called to test each entry in a partition which is slated /// for a single slice. @@ -1830,7 +1840,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, return true; } -/// \brief Test whether the given alloca partitioning and range of slices can be +/// Test whether the given alloca partitioning and range of slices can be /// promoted to a vector. /// /// This is a quick test to check whether we can rewrite a particular alloca @@ -1896,7 +1906,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { "All non-integer types eliminated!"); return RHSTy->getNumElements() < LHSTy->getNumElements(); }; - std::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes); + llvm::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes); CandidateTys.erase( std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes), CandidateTys.end()); @@ -1943,7 +1953,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { return nullptr; } -/// \brief Test whether a slice of an alloca is valid for integer widening. +/// Test whether a slice of an alloca is valid for integer widening. /// /// This implements the necessary checking for the \c isIntegerWideningViable /// test below on a single slice of the alloca. @@ -1970,6 +1980,10 @@ static bool isIntegerWideningViableForSlice(const Slice &S, // We can't handle loads that extend past the allocated memory. if (DL.getTypeStoreSize(LI->getType()) > Size) return false; + // So far, AllocaSliceRewriter does not support widening split slice tails + // in rewriteIntegerLoad. + if (S.beginOffset() < AllocBeginOffset) + return false; // Note that we don't count vector loads or stores as whole-alloca // operations which enable integer widening because we would prefer to use // vector widening instead. @@ -1991,6 +2005,10 @@ static bool isIntegerWideningViableForSlice(const Slice &S, // We can't handle stores that extend past the allocated memory. if (DL.getTypeStoreSize(ValueTy) > Size) return false; + // So far, AllocaSliceRewriter does not support widening split slice tails + // in rewriteIntegerStore. + if (S.beginOffset() < AllocBeginOffset) + return false; // Note that we don't count vector loads or stores as whole-alloca // operations which enable integer widening because we would prefer to use // vector widening instead. @@ -2021,7 +2039,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, return true; } -/// \brief Test whether the given alloca partition's integer operations can be +/// Test whether the given alloca partition's integer operations can be /// widened to promotable ones. /// /// This is a quick test to check whether we can rewrite the integer loads and @@ -2072,7 +2090,7 @@ static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { - DEBUG(dbgs() << " start: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " start: " << *V << "\n"); IntegerType *IntTy = cast<IntegerType>(V->getType()); assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element extends past full value"); @@ -2081,13 +2099,13 @@ static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); if (ShAmt) { V = IRB.CreateLShr(V, ShAmt, Name + ".shift"); - DEBUG(dbgs() << " shifted: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n"); } assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot extract to a larger integer!"); if (Ty != IntTy) { V = IRB.CreateTrunc(V, Ty, Name + ".trunc"); - DEBUG(dbgs() << " trunced: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n"); } return V; } @@ -2098,10 +2116,10 @@ static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, IntegerType *Ty = cast<IntegerType>(V->getType()); assert(Ty->getBitWidth() <= IntTy->getBitWidth() && "Cannot insert a larger integer!"); - DEBUG(dbgs() << " start: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " start: " << *V << "\n"); if (Ty != IntTy) { V = IRB.CreateZExt(V, IntTy, Name + ".ext"); - DEBUG(dbgs() << " extended: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " extended: " << *V << "\n"); } assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) && "Element store outside of alloca store"); @@ -2110,15 +2128,15 @@ static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset); if (ShAmt) { V = IRB.CreateShl(V, ShAmt, Name + ".shift"); - DEBUG(dbgs() << " shifted: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n"); } if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) { APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt); Old = IRB.CreateAnd(Old, Mask, Name + ".mask"); - DEBUG(dbgs() << " masked: " << *Old << "\n"); + LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n"); V = IRB.CreateOr(Old, V, Name + ".insert"); - DEBUG(dbgs() << " inserted: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n"); } return V; } @@ -2135,7 +2153,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, if (NumElements == 1) { V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex), Name + ".extract"); - DEBUG(dbgs() << " extract: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " extract: " << *V << "\n"); return V; } @@ -2145,7 +2163,7 @@ static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, Mask.push_back(IRB.getInt32(i)); V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(Mask), Name + ".extract"); - DEBUG(dbgs() << " shuffle: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); return V; } @@ -2159,7 +2177,7 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, // Single element to insert. V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex), Name + ".insert"); - DEBUG(dbgs() << " insert: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " insert: " << *V << "\n"); return V; } @@ -2184,7 +2202,7 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, Mask.push_back(UndefValue::get(IRB.getInt32Ty())); V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()), ConstantVector::get(Mask), Name + ".expand"); - DEBUG(dbgs() << " shuffle: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n"); Mask.clear(); for (unsigned i = 0; i != VecTy->getNumElements(); ++i) @@ -2192,11 +2210,11 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend"); - DEBUG(dbgs() << " blend: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " blend: " << *V << "\n"); return V; } -/// \brief Visitor to rewrite instructions using p particular slice of an alloca +/// Visitor to rewrite instructions using p particular slice of an alloca /// to use a new alloca. /// /// Also implements the rewriting to vector-based accesses when the partition @@ -2295,9 +2313,9 @@ public: IsSplittable = I->isSplittable(); IsSplit = BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset; - DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : "")); - DEBUG(AS.printSlice(dbgs(), I, "")); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : "")); + LLVM_DEBUG(AS.printSlice(dbgs(), I, "")); + LLVM_DEBUG(dbgs() << "\n"); // Compute the intersecting offset range. assert(BeginOffset < NewAllocaEndOffset); @@ -2327,7 +2345,7 @@ private: // Every instruction which can end up as a user must have a rewrite rule. bool visitInstruction(Instruction &I) { - DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n"); + LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n"); llvm_unreachable("No rewrite rule for this instruction!"); } @@ -2369,7 +2387,7 @@ private: ); } - /// \brief Compute suitable alignment to access this slice of the *new* + /// Compute suitable alignment to access this slice of the *new* /// alloca. /// /// You can optionally pass a type to this routine and if that type's ABI @@ -2431,10 +2449,13 @@ private: } bool visitLoadInst(LoadInst &LI) { - DEBUG(dbgs() << " original: " << LI << "\n"); + LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); Value *OldOp = LI.getOperand(0); assert(OldOp == OldPtr); + AAMDNodes AATags; + LI.getAAMetadata(AATags); + unsigned AS = LI.getPointerAddressSpace(); Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8) @@ -2453,6 +2474,8 @@ private: TargetTy->isIntegerTy()))) { LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), LI.getName()); + if (AATags) + NewLI->setAAMetadata(AATags); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); @@ -2488,6 +2511,8 @@ private: LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(TargetTy), LI.isVolatile(), LI.getName()); + if (AATags) + NewLI->setAAMetadata(AATags); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); @@ -2524,11 +2549,12 @@ private: Pass.DeadInsts.insert(&LI); deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " to: " << *V << "\n"); return !LI.isVolatile() && !IsPtrAdjusted; } - bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) { + bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp, + AAMDNodes AATags) { if (V->getType() != VecTy) { unsigned BeginIndex = getIndex(NewBeginOffset); unsigned EndIndex = getIndex(NewEndOffset); @@ -2546,14 +2572,15 @@ private: V = insertVector(IRB, Old, V, BeginIndex, "vec"); } StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); + if (AATags) + Store->setAAMetadata(AATags); Pass.DeadInsts.insert(&SI); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } - bool rewriteIntegerStore(Value *V, StoreInst &SI) { + bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { @@ -2567,16 +2594,21 @@ private: V = convertValue(DL, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); + if (AATags) + Store->setAAMetadata(AATags); Pass.DeadInsts.insert(&SI); - DEBUG(dbgs() << " to: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return true; } bool visitStoreInst(StoreInst &SI) { - DEBUG(dbgs() << " original: " << SI << "\n"); + LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); Value *OldOp = SI.getOperand(1); assert(OldOp == OldPtr); + AAMDNodes AATags; + SI.getAAMetadata(AATags); + Value *V = SI.getValueOperand(); // Strip all inbounds GEPs and pointer casts to try to dig out any root @@ -2598,9 +2630,9 @@ private: } if (VecTy) - return rewriteVectorizedStoreInst(V, SI, OldOp); + return rewriteVectorizedStoreInst(V, SI, OldOp, AATags); if (IntTy && V->getType()->isIntegerTy()) - return rewriteIntegerStore(V, SI); + return rewriteIntegerStore(V, SI, AATags); const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize; StoreInst *NewSI; @@ -2631,16 +2663,18 @@ private: SI.isVolatile()); } NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); + if (AATags) + NewSI->setAAMetadata(AATags); if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: " << *NewSI << "\n"); + LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n"); return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile(); } - /// \brief Compute an integer value from splatting an i8 across the given + /// Compute an integer value from splatting an i8 across the given /// number of bytes. /// /// Note that this routine assumes an i8 is a byte. If that isn't true, don't @@ -2667,25 +2701,27 @@ private: return V; } - /// \brief Compute a vector splat for a given element value. + /// Compute a vector splat for a given element value. Value *getVectorSplat(Value *V, unsigned NumElements) { V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); - DEBUG(dbgs() << " splat: " << *V << "\n"); + LLVM_DEBUG(dbgs() << " splat: " << *V << "\n"); return V; } bool visitMemSetInst(MemSetInst &II) { - DEBUG(dbgs() << " original: " << II << "\n"); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); assert(II.getRawDest() == OldPtr); + AAMDNodes AATags; + II.getAAMetadata(AATags); + // If the memset has a variable size, it cannot be split, just adjust the // pointer to the new alloca. if (!isa<Constant>(II.getLength())) { assert(!IsSplit); assert(NewBeginOffset == BeginOffset); II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment(ConstantInt::get(CstTy, getSliceAlign())); + II.setDestAlignment(getSliceAlign()); deleteIfTriviallyDead(OldPtr); return false; @@ -2710,8 +2746,9 @@ private: CallInst *New = IRB.CreateMemSet( getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, getSliceAlign(), II.isVolatile()); - (void)New; - DEBUG(dbgs() << " to: " << *New << "\n"); + if (AATags) + New->setAAMetadata(AATags); + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -2773,10 +2810,11 @@ private: V = convertValue(DL, IRB, V, AllocaTy); } - Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), - II.isVolatile()); - (void)New; - DEBUG(dbgs() << " to: " << *New << "\n"); + StoreInst *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + II.isVolatile()); + if (AATags) + New->setAAMetadata(AATags); + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); } @@ -2784,7 +2822,10 @@ private: // Rewriting of memory transfer instructions can be a bit tricky. We break // them into two categories: split intrinsics and unsplit intrinsics. - DEBUG(dbgs() << " original: " << II << "\n"); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + + AAMDNodes AATags; + II.getAAMetadata(AATags); bool IsDest = &II.getRawDestUse() == OldUse; assert((IsDest && II.getRawDest() == OldPtr) || @@ -2801,18 +2842,16 @@ private: // update both source and dest of a single call. if (!IsSplittable) { Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); - if (IsDest) + if (IsDest) { II.setDest(AdjustedPtr); - else + II.setDestAlignment(SliceAlign); + } + else { II.setSource(AdjustedPtr); - - if (II.getAlignment() > SliceAlign) { - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment( - ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign))); + II.setSourceAlignment(SliceAlign); } - DEBUG(dbgs() << " to: " << II << "\n"); + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); deleteIfTriviallyDead(OldPtr); return false; } @@ -2862,8 +2901,10 @@ private: // Compute the relative offset for the other pointer within the transfer. unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS); APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset); - unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1, - OtherOffset.zextOrTrunc(64).getZExtValue()); + unsigned OtherAlign = + IsDest ? II.getSourceAlignment() : II.getDestAlignment(); + OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1, + OtherOffset.zextOrTrunc(64).getZExtValue()); if (EmitMemCpy) { // Compute the other pointer, folding as much as possible to produce @@ -2875,11 +2916,25 @@ private: Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); - CallInst *New = IRB.CreateMemCpy( - IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size, - MinAlign(SliceAlign, OtherAlign), II.isVolatile()); - (void)New; - DEBUG(dbgs() << " to: " << *New << "\n"); + Value *DestPtr, *SrcPtr; + unsigned DestAlign, SrcAlign; + // Note: IsDest is true iff we're copying into the new alloca slice + if (IsDest) { + DestPtr = OurPtr; + DestAlign = SliceAlign; + SrcPtr = OtherPtr; + SrcAlign = OtherAlign; + } else { + DestPtr = OtherPtr; + DestAlign = OtherAlign; + SrcPtr = OurPtr; + SrcAlign = SliceAlign; + } + CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, + Size, II.isVolatile()); + if (AATags) + New->setAAMetadata(AATags); + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return false; } @@ -2927,8 +2982,11 @@ private: uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract"); } else { - Src = - IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(), "copyload"); + LoadInst *Load = IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(), + "copyload"); + if (AATags) + Load->setAAMetadata(AATags); + Src = Load; } if (VecTy && !IsWholeAlloca && IsDest) { @@ -2946,15 +3004,16 @@ private: StoreInst *Store = cast<StoreInst>( IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); + if (AATags) + Store->setAAMetadata(AATags); + LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); } bool visitIntrinsicInst(IntrinsicInst &II) { assert(II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end); - DEBUG(dbgs() << " original: " << II << "\n"); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. @@ -2982,13 +3041,13 @@ private: New = IRB.CreateLifetimeEnd(Ptr, Size); (void)New; - DEBUG(dbgs() << " to: " << *New << "\n"); + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return true; } bool visitPHINode(PHINode &PN) { - DEBUG(dbgs() << " original: " << PN << "\n"); + LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable"); @@ -3007,7 +3066,7 @@ private: // Replace the operands which were using the old pointer. std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr); - DEBUG(dbgs() << " to: " << PN << "\n"); + LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); deleteIfTriviallyDead(OldPtr); // PHIs can't be promoted on their own, but often can be speculated. We @@ -3018,7 +3077,7 @@ private: } bool visitSelectInst(SelectInst &SI) { - DEBUG(dbgs() << " original: " << SI << "\n"); + LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) && "Pointer isn't an operand!"); assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable"); @@ -3031,7 +3090,7 @@ private: if (SI.getOperand(2) == OldPtr) SI.setOperand(2, NewPtr); - DEBUG(dbgs() << " to: " << SI << "\n"); + LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); // Selects can't be promoted on their own, but often can be speculated. We @@ -3044,7 +3103,7 @@ private: namespace { -/// \brief Visitor to rewrite aggregate loads and stores as scalar. +/// Visitor to rewrite aggregate loads and stores as scalar. /// /// This pass aggressively rewrites all aggregate loads and stores on /// a particular pointer (or any pointer derived from it which we can identify) @@ -3067,7 +3126,7 @@ public: /// Rewrite loads and stores through a pointer and all pointers derived from /// it. bool rewrite(Instruction &I) { - DEBUG(dbgs() << " Rewriting FCA loads and stores...\n"); + LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n"); enqueueUsers(I); bool Changed = false; while (!Queue.empty()) { @@ -3089,7 +3148,7 @@ private: // Conservative default is to not rewrite anything. bool visitInstruction(Instruction &I) { return false; } - /// \brief Generic recursive split emission class. + /// Generic recursive split emission class. template <typename Derived> class OpSplitter { protected: /// The builder used to form new instructions. @@ -3113,7 +3172,7 @@ private: : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {} public: - /// \brief Generic recursive split emission routine. + /// Generic recursive split emission routine. /// /// This method recursively splits an aggregate op (load or store) into /// scalar or vector ops. It splits recursively until it hits a single value @@ -3165,8 +3224,10 @@ private: }; struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { - LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr) - : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {} + AAMDNodes AATags; + + LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags) + : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. @@ -3175,9 +3236,11 @@ private: // Load the single value and insert it using the indices. Value *GEP = IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); - Value *Load = IRB.CreateLoad(GEP, Name + ".load"); + LoadInst *Load = IRB.CreateLoad(GEP, Name + ".load"); + if (AATags) + Load->setAAMetadata(AATags); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); - DEBUG(dbgs() << " to: " << *Load << "\n"); + LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } }; @@ -3187,8 +3250,10 @@ private: return false; // We have an aggregate being loaded, split it apart. - DEBUG(dbgs() << " original: " << LI << "\n"); - LoadOpSplitter Splitter(&LI, *U); + LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); + AAMDNodes AATags; + LI.getAAMetadata(AATags); + LoadOpSplitter Splitter(&LI, *U, AATags); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); LI.replaceAllUsesWith(V); @@ -3197,8 +3262,9 @@ private: } struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { - StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr) - : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {} + StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags) + : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {} + AAMDNodes AATags; /// Emit a leaf store of a single value. This is called at the leaves of the /// recursive emission to actually produce stores. @@ -3212,9 +3278,10 @@ private: IRB.CreateExtractValue(Agg, Indices, Name + ".extract"); Value *InBoundsGEP = IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); - Value *Store = IRB.CreateStore(ExtractValue, InBoundsGEP); - (void)Store; - DEBUG(dbgs() << " to: " << *Store << "\n"); + StoreInst *Store = IRB.CreateStore(ExtractValue, InBoundsGEP); + if (AATags) + Store->setAAMetadata(AATags); + LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); } }; @@ -3226,8 +3293,10 @@ private: return false; // We have an aggregate being stored, split it apart. - DEBUG(dbgs() << " original: " << SI << "\n"); - StoreOpSplitter Splitter(&SI, *U); + LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); + AAMDNodes AATags; + SI.getAAMetadata(AATags); + StoreOpSplitter Splitter(&SI, *U, AATags); Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); SI.eraseFromParent(); return true; @@ -3256,7 +3325,7 @@ private: } // end anonymous namespace -/// \brief Strip aggregate type wrapping. +/// Strip aggregate type wrapping. /// /// This removes no-op aggregate types wrapping an underlying type. It will /// strip as many layers of types as it can without changing either the type @@ -3286,7 +3355,7 @@ static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) { return stripAggregateTypeWrapping(DL, InnerTy); } -/// \brief Try to find a partition of the aggregate type passed in for a given +/// Try to find a partition of the aggregate type passed in for a given /// offset and size. /// /// This recurses through the aggregate type and tries to compute a subtype @@ -3392,7 +3461,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, return SubTy; } -/// \brief Pre-split loads and stores to simplify rewriting. +/// Pre-split loads and stores to simplify rewriting. /// /// We want to break up the splittable load+store pairs as much as /// possible. This is important to do as a preprocessing step, as once we @@ -3423,7 +3492,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, /// /// \returns true if any changes are made. bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { - DEBUG(dbgs() << "Pre-splitting loads and stores\n"); + LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n"); // Track the loads and stores which are candidates for pre-splitting here, in // the order they first appear during the partition scan. These give stable @@ -3455,7 +3524,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // maybe it would make it more principled? SmallPtrSet<LoadInst *, 8> UnsplittableLoads; - DEBUG(dbgs() << " Searching for candidate loads and stores\n"); + LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n"); for (auto &P : AS.partitions()) { for (Slice &S : P) { Instruction *I = cast<Instruction>(S.getUse()->getUser()); @@ -3510,7 +3579,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { } // Record the initial split. - DEBUG(dbgs() << " Candidate: " << *I << "\n"); + LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n"); auto &Offsets = SplitOffsetsMap[I]; assert(Offsets.Splits.empty() && "Should not have splits the first time we see an instruction!"); @@ -3570,10 +3639,11 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { if (LoadOffsets.Splits == StoreOffsets.Splits) return false; - DEBUG(dbgs() - << " Mismatched splits for load and store:\n" - << " " << *LI << "\n" - << " " << *SI << "\n"); + LLVM_DEBUG( + dbgs() + << " Mismatched splits for load and store:\n" + << " " << *LI << "\n" + << " " << *SI << "\n"); // We've found a store and load that we need to split // with mismatched relative splits. Just give up on them @@ -3646,7 +3716,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand()); IRB.SetInsertPoint(LI); - DEBUG(dbgs() << " Splitting load: " << *LI << "\n"); + LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n"); uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); int Idx = 0, Size = Offsets.Splits.size(); @@ -3656,7 +3726,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { auto *PartPtrTy = PartTy->getPointerTo(AS); LoadInst *PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, BasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); @@ -3671,9 +3741,9 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), /*IsSplittable*/ false)); - DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() - << ", " << NewSlices.back().endOffset() << "): " << *PLoad - << "\n"); + LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PLoad << "\n"); // See if we've handled all the splits. if (Idx >= Size) @@ -3693,14 +3763,15 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { StoreInst *SI = cast<StoreInst>(LU); if (!Stores.empty() && SplitOffsetsMap.count(SI)) { DeferredStores = true; - DEBUG(dbgs() << " Deferred splitting of store: " << *SI << "\n"); + LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI + << "\n"); continue; } Value *StoreBasePtr = SI->getPointerOperand(); IRB.SetInsertPoint(SI); - DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); + LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { LoadInst *PLoad = SplitLoads[Idx]; @@ -3712,11 +3783,11 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); - DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); + LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } // We want to immediately iterate on any allocas impacted by splitting @@ -3765,7 +3836,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { Value *LoadBasePtr = LI->getPointerOperand(); Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand()); - DEBUG(dbgs() << " Splitting store: " << *SI << "\n"); + LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n"); // Check whether we have an already split load. auto SplitLoadsMapI = SplitLoadsMap.find(LI); @@ -3775,7 +3846,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { assert(SplitLoads->size() == Offsets.Splits.size() + 1 && "Too few split loads for the number of splits in the store!"); } else { - DEBUG(dbgs() << " of load: " << *LI << "\n"); + LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n"); } uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); @@ -3794,7 +3865,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { auto AS = LI->getPointerAddressSpace(); PLoad = IRB.CreateAlignedLoad( getAdjustedPtr(IRB, DL, LoadBasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); @@ -3806,7 +3877,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { StoreInst *PStore = IRB.CreateAlignedStore( PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr, - APInt(DL.getPointerSizeInBits(AS), PartOffset), + APInt(DL.getIndexSizeInBits(AS), PartOffset), StorePartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); @@ -3815,11 +3886,11 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, &PStore->getOperandUse(PStore->getPointerOperandIndex()), /*IsSplittable*/ false)); - DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() - << ", " << NewSlices.back().endOffset() << "): " << *PStore - << "\n"); + LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PStore << "\n"); if (!SplitLoads) { - DEBUG(dbgs() << " of split load: " << *PLoad << "\n"); + LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n"); } // See if we've finished all the splits. @@ -3874,10 +3945,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // sequence. AS.insert(NewSlices); - DEBUG(dbgs() << " Pre-split slices:\n"); + LLVM_DEBUG(dbgs() << " Pre-split slices:\n"); #ifndef NDEBUG for (auto I = AS.begin(), E = AS.end(); I != E; ++I) - DEBUG(AS.print(dbgs(), I, " ")); + LLVM_DEBUG(AS.print(dbgs(), I, " ")); #endif // Finally, don't try to promote any allocas that new require re-splitting. @@ -3891,7 +3962,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { return true; } -/// \brief Rewrite an alloca partition's users. +/// Rewrite an alloca partition's users. /// /// This routine drives both of the rewriting goals of the SROA pass. It tries /// to rewrite uses of an alloca partition to be conducive for SSA value @@ -3934,10 +4005,10 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, // exact same type as the original, and with the same access offsets. In that // case, re-use the existing alloca, but still run through the rewriter to // perform phi and select speculation. + // P.beginOffset() can be non-zero even with the same type in a case with + // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll). AllocaInst *NewAI; - if (SliceTy == AI.getAllocatedType()) { - assert(P.beginOffset() == 0 && - "Non-zero begin offset but same alloca type"); + if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) { NewAI = &AI; // FIXME: We should be able to bail at this point with "nothing changed". // FIXME: We might want to defer PHI speculation until after here. @@ -3958,12 +4029,14 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, NewAI = new AllocaInst( SliceTy, AI.getType()->getAddressSpace(), nullptr, Alignment, AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI); + // Copy the old AI debug location over to the new one. + NewAI->setDebugLoc(AI.getDebugLoc()); ++NumNewAllocas; } - DEBUG(dbgs() << "Rewriting alloca partition " - << "[" << P.beginOffset() << "," << P.endOffset() - << ") to: " << *NewAI << "\n"); + LLVM_DEBUG(dbgs() << "Rewriting alloca partition " + << "[" << P.beginOffset() << "," << P.endOffset() + << ") to: " << *NewAI << "\n"); // Track the high watermark on the worklist as it is only relevant for // promoted allocas. We will reset it to this point if the alloca is not in @@ -4040,7 +4113,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, return NewAI; } -/// \brief Walks the slices of an alloca and form partitions based on them, +/// Walks the slices of an alloca and form partitions based on them, /// rewriting each of their uses. bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { if (AS.begin() == AS.end()) @@ -4063,7 +4136,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { uint64_t AllocaSize = DL.getTypeAllocSize(AI.getAllocatedType()); const uint64_t MaxBitVectorSize = 1024; - if (SROASplitNonWholeAllocaSlices && AllocaSize <= MaxBitVectorSize) { + if (AllocaSize <= MaxBitVectorSize) { // If a byte boundary is included in any load or store, a slice starting or // ending at the boundary is not splittable. SmallBitVector SplittableOffset(AllocaSize + 1, true); @@ -4106,7 +4179,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } if (!IsSorted) - std::sort(AS.begin(), AS.end()); + llvm::sort(AS.begin(), AS.end()); /// Describes the allocas introduced by rewritePartition in order to migrate /// the debug info. @@ -4201,7 +4274,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { return Changed; } -/// \brief Clobber a use with undef, deleting the used value if it becomes dead. +/// Clobber a use with undef, deleting the used value if it becomes dead. void SROA::clobberUse(Use &U) { Value *OldV = U; // Replace the use with an undef value. @@ -4216,13 +4289,13 @@ void SROA::clobberUse(Use &U) { } } -/// \brief Analyze an alloca for SROA. +/// Analyze an alloca for SROA. /// /// This analyzes the alloca to ensure we can reason about it, builds /// the slices of the alloca, and then hands it off to be split and /// rewritten as needed. bool SROA::runOnAlloca(AllocaInst &AI) { - DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); + LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); ++NumAllocasAnalyzed; // Special case dead allocas, as they're trivial. @@ -4246,7 +4319,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { // Build the slices using a recursive instruction-visiting builder. AllocaSlices AS(DL, AI); - DEBUG(AS.print(dbgs())); + LLVM_DEBUG(AS.print(dbgs())); if (AS.isEscaped()) return Changed; @@ -4274,18 +4347,18 @@ bool SROA::runOnAlloca(AllocaInst &AI) { Changed |= splitAlloca(AI, AS); - DEBUG(dbgs() << " Speculating PHIs\n"); + LLVM_DEBUG(dbgs() << " Speculating PHIs\n"); while (!SpeculatablePHIs.empty()) speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val()); - DEBUG(dbgs() << " Speculating Selects\n"); + LLVM_DEBUG(dbgs() << " Speculating Selects\n"); while (!SpeculatableSelects.empty()) speculateSelectInstLoads(*SpeculatableSelects.pop_back_val()); return Changed; } -/// \brief Delete the dead instructions accumulated in this run. +/// Delete the dead instructions accumulated in this run. /// /// Recursively deletes the dead instructions we've accumulated. This is done /// at the very end to maximize locality of the recursive delete and to @@ -4299,7 +4372,7 @@ bool SROA::deleteDeadInstructions( bool Changed = false; while (!DeadInsts.empty()) { Instruction *I = DeadInsts.pop_back_val(); - DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); + LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); // If the instruction is an alloca, find the possible dbg.declare connected // to it, and remove it too. We must do this before calling RAUW or we will @@ -4327,7 +4400,7 @@ bool SROA::deleteDeadInstructions( return Changed; } -/// \brief Promote the allocas, using the best available technique. +/// Promote the allocas, using the best available technique. /// /// This attempts to promote whatever allocas have been identified as viable in /// the PromotableAllocas list. If that list is empty, there is nothing to do. @@ -4338,7 +4411,7 @@ bool SROA::promoteAllocas(Function &F) { NumPromoted += PromotableAllocas.size(); - DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); + LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); PromoteMemToReg(PromotableAllocas, *DT, AC); PromotableAllocas.clear(); return true; @@ -4346,7 +4419,7 @@ bool SROA::promoteAllocas(Function &F) { PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, AssumptionCache &RunAC) { - DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); DT = &RunDT; AC = &RunAC; |