summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp1057
1 files changed, 571 insertions, 486 deletions
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index d22b3f409585..a8ec8bb97970 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -13,10 +13,10 @@
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
// upwards.
-// 2. Check that there are no reads between EarlierAccess and the StartDef by
-// checking all uses starting at EarlierAccess and walking until we see
+// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
+// checking all uses starting at MaybeDeadAccess and walking until we see
// StartDef.
// 3. For each found CurrentDef, check that:
// 1. There are no barrier instructions between CurrentDef and StartDef (like
@@ -56,6 +56,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -122,7 +124,7 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
static cl::opt<unsigned>
MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
- "dead store elimination (default = 100)"));
+ "dead store elimination (default = 150)"));
static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
"dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
cl::desc("The maximum number of steps while walking upwards to find "
@@ -203,39 +205,6 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
return false;
}
-/// Return a Location stored to by the specified instruction. If isRemovable
-/// returns true, this function and getLocForRead completely describe the memory
-/// operations for this instruction.
-static MemoryLocation getLocForWrite(Instruction *Inst,
- const TargetLibraryInfo &TLI) {
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return MemoryLocation::get(SI);
-
- // memcpy/memmove/memset.
- if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
- return MemoryLocation::getForDest(MI);
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- default:
- return MemoryLocation(); // Unhandled intrinsic.
- case Intrinsic::init_trampoline:
- return MemoryLocation::getAfter(II->getArgOperand(0));
- case Intrinsic::masked_store:
- return MemoryLocation::getForArgument(II, 1, TLI);
- case Intrinsic::lifetime_end: {
- uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- return MemoryLocation(II->getArgOperand(1), Len);
- }
- }
- }
- if (auto *CB = dyn_cast<CallBase>(Inst))
- // All the supported TLI functions so far happen to have dest as their
- // first argument.
- return MemoryLocation::getAfter(CB->getArgOperand(0));
- return MemoryLocation();
-}
-
/// If the value of this instruction and the memory it writes to is unused, may
/// we delete this instruction?
static bool isRemovable(Instruction *I) {
@@ -333,147 +302,146 @@ enum OverwriteResult {
} // end anonymous namespace
/// Check if two instruction are masked stores that completely
-/// overwrite one another. More specifically, \p Later has to
-/// overwrite \p Earlier.
-static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
- const Instruction *Earlier,
+/// overwrite one another. More specifically, \p KillingI has to
+/// overwrite \p DeadI.
+static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
BatchAAResults &AA) {
- const auto *IIL = dyn_cast<IntrinsicInst>(Later);
- const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
- if (IIL == nullptr || IIE == nullptr)
+ const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
+ const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
+ if (KillingII == nullptr || DeadII == nullptr)
return OW_Unknown;
- if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
- IIE->getIntrinsicID() != Intrinsic::masked_store)
+ if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
+ DeadII->getIntrinsicID() != Intrinsic::masked_store)
return OW_Unknown;
// Pointers.
- Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
- Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
- if (LP != EP && !AA.isMustAlias(LP, EP))
+ Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
+ Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
+ if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
return OW_Unknown;
// Masks.
- // TODO: check that Later's mask is a superset of the Earlier's mask.
- if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
+ // TODO: check that KillingII's mask is a superset of the DeadII's mask.
+ if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
return OW_Unknown;
return OW_Complete;
}
-/// Return 'OW_Complete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
-/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later'.
-/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
-/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
+/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
+/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
+/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
+/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
+/// overwritten by a killing (smaller) store which doesn't write outside the big
/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
-/// NOTE: This function must only be called if both \p Later and \p Earlier
-/// write to the same underlying object with valid \p EarlierOff and \p
-/// LaterOff.
-static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
- const MemoryLocation &Earlier,
- int64_t EarlierOff, int64_t LaterOff,
- Instruction *DepWrite,
+/// NOTE: This function must only be called if both \p KillingLoc and \p
+/// DeadLoc belong to the same underlying object with valid \p KillingOff and
+/// \p DeadOff.
+static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t KillingOff, int64_t DeadOff,
+ Instruction *DeadI,
InstOverlapIntervalsTy &IOL) {
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
- // earlier write.
+ // dead store.
// Note: The correctness of this logic depends on the fact that this function
// is not even called providing DepWrite when there are any intervening reads.
if (EnablePartialOverwriteTracking &&
- LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= EarlierOff) {
+ KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= DeadOff) {
// Insert our part of the overlap into the map.
- auto &IM = IOL[DepWrite];
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
- << ", " << int64_t(EarlierOff + EarlierSize)
- << ") Later [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ auto &IM = IOL[DeadI];
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
+ << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
+ << KillingOff << ", " << int64_t(KillingOff + KillingSize)
+ << ")\n");
// Make sure that we only insert non-overlapping intervals and combine
// adjacent intervals. The intervals are stored in the map with the ending
// offset as the key (in the half-open sense) and the starting offset as
// the value.
- int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
+ int64_t KillingIntStart = KillingOff;
+ int64_t KillingIntEnd = KillingOff + KillingSize;
- // Find any intervals ending at, or after, LaterIntStart which start
- // before LaterIntEnd.
- auto ILI = IM.lower_bound(LaterIntStart);
- if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
+ // Find any intervals ending at, or after, KillingIntStart which start
+ // before KillingIntEnd.
+ auto ILI = IM.lower_bound(KillingIntStart);
+ if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
// This existing interval is overlapped with the current store somewhere
- // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
+ // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
// intervals and adjusting our start and end.
- LaterIntStart = std::min(LaterIntStart, ILI->second);
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ KillingIntStart = std::min(KillingIntStart, ILI->second);
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
// Continue erasing and adjusting our end in case other previous
// intervals are also overlapped with the current store.
//
- // |--- ealier 1 ---| |--- ealier 2 ---|
- // |------- later---------|
+ // |--- dead 1 ---| |--- dead 2 ---|
+ // |------- killing---------|
//
- while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
- assert(ILI->second > LaterIntStart && "Unexpected interval");
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
+ assert(ILI->second > KillingIntStart && "Unexpected interval");
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
}
}
- IM[LaterIntEnd] = LaterIntStart;
+ IM[KillingIntEnd] = KillingIntStart;
ILI = IM.begin();
- if (ILI->second <= EarlierOff &&
- ILI->first >= int64_t(EarlierOff + EarlierSize)) {
- LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") Composite Later [" << ILI->second << ", "
+ if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
+ LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
+ << DeadOff << ", " << int64_t(DeadOff + DeadSize)
+ << ") Composite KillingLoc [" << ILI->second << ", "
<< ILI->first << ")\n");
++NumCompletePartials;
return OW_Complete;
}
}
- // Check for an earlier store which writes to all the memory locations that
- // the later store writes to.
- if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
- int64_t(EarlierOff + EarlierSize) > LaterOff &&
- uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") by a later store [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ // Check for a dead store which writes to all the memory locations that
+ // the killing store writes to.
+ if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
+ int64_t(DeadOff + DeadSize) > KillingOff &&
+ uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
+ << ", " << int64_t(DeadOff + DeadSize)
+ << ") by a killing store [" << KillingOff << ", "
+ << int64_t(KillingOff + KillingSize) << ")\n");
// TODO: Maybe come up with a better name?
return OW_PartialEarlierWithFullLater;
}
- // Another interesting case is if the later store overwrites the end of the
- // earlier store.
+ // Another interesting case is if the killing store overwrites the end of the
+ // dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
- // In this case we may want to trim the size of earlier to avoid generating
- // writes to addresses which will definitely be overwritten later
+ // In this case we may want to trim the size of dead store to avoid
+ // generating stores to addresses which will definitely be overwritten killing
+ // store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
+ (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
return OW_End;
- // Finally, we also need to check if the later store overwrites the beginning
- // of the earlier store.
+ // Finally, we also need to check if the killing store overwrites the
+ // beginning of the dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
// In this case we may want to move the destination address and trim the size
- // of earlier to avoid generating writes to addresses which will definitely
- // be overwritten later.
+ // of dead store to avoid generating stores to addresses which will definitely
+ // be overwritten killing store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
- assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
+ (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
+ assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
"Expect to be handled as OW_Complete");
return OW_Begin;
}
@@ -505,7 +473,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
BasicBlock::iterator SecondBBI(SecondI);
BasicBlock *FirstBB = FirstI->getParent();
BasicBlock *SecondBB = SecondI->getParent();
- MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+ MemoryLocation MemLoc;
+ if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
+ MemLoc = MemoryLocation::getForDest(MemSet);
+ else
+ MemLoc = MemoryLocation::get(SecondI);
+
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB.
@@ -568,11 +541,11 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
return true;
}
-static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
- uint64_t &EarlierSize, int64_t LaterStart,
- uint64_t LaterSize, bool IsOverwriteEnd) {
- auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
- Align PrefAlign = EarlierIntrinsic->getDestAlign().valueOrOne();
+static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
+ uint64_t &DeadSize, int64_t KillingStart,
+ uint64_t KillingSize, bool IsOverwriteEnd) {
+ auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+ Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
// We assume that memet/memcpy operates in chunks of the "largest" native
// type size and aligned on the same value. That means optimal start and size
@@ -593,19 +566,19 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
// Compute start and size of the region to remove. Make sure 'PrefAlign' is
// maintained on the remaining store.
if (IsOverwriteEnd) {
- // Calculate required adjustment for 'LaterStart'in order to keep remaining
- // store size aligned on 'PerfAlign'.
+ // Calculate required adjustment for 'KillingStart' in order to keep
+ // remaining store size aligned on 'PerfAlign'.
uint64_t Off =
- offsetToAlignment(uint64_t(LaterStart - EarlierStart), PrefAlign);
- ToRemoveStart = LaterStart + Off;
- if (EarlierSize <= uint64_t(ToRemoveStart - EarlierStart))
+ offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
+ ToRemoveStart = KillingStart + Off;
+ if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
return false;
- ToRemoveSize = EarlierSize - uint64_t(ToRemoveStart - EarlierStart);
+ ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
} else {
- ToRemoveStart = EarlierStart;
- assert(LaterSize >= uint64_t(EarlierStart - LaterStart) &&
+ ToRemoveStart = DeadStart;
+ assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
"Not overlapping accesses?");
- ToRemoveSize = LaterSize - uint64_t(EarlierStart - LaterStart);
+ ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
// Calculate required adjustment for 'ToRemoveSize'in order to keep
// start of the remaining store aligned on 'PerfAlign'.
uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
@@ -619,10 +592,10 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
- assert(EarlierSize > ToRemoveSize && "Can't remove more than original size");
+ assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
- uint64_t NewSize = EarlierSize - ToRemoveSize;
- if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
+ uint64_t NewSize = DeadSize - ToRemoveSize;
+ if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
// When shortening an atomic memory intrinsic, the newly shortened
// length must remain an integer multiple of the element size.
const uint32_t ElementSize = AMI->getElementSizeInBytes();
@@ -631,65 +604,62 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
- << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
- << *EarlierWrite << "\n KILLER [" << ToRemoveStart << ", "
+ << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
+ << "\n KILLER [" << ToRemoveStart << ", "
<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
- Value *EarlierWriteLength = EarlierIntrinsic->getLength();
- Value *TrimmedLength =
- ConstantInt::get(EarlierWriteLength->getType(), NewSize);
- EarlierIntrinsic->setLength(TrimmedLength);
- EarlierIntrinsic->setDestAlignment(PrefAlign);
+ Value *DeadWriteLength = DeadIntrinsic->getLength();
+ Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
+ DeadIntrinsic->setLength(TrimmedLength);
+ DeadIntrinsic->setDestAlignment(PrefAlign);
if (!IsOverwriteEnd) {
- Value *OrigDest = EarlierIntrinsic->getRawDest();
+ Value *OrigDest = DeadIntrinsic->getRawDest();
Type *Int8PtrTy =
- Type::getInt8PtrTy(EarlierIntrinsic->getContext(),
+ Type::getInt8PtrTy(DeadIntrinsic->getContext(),
OrigDest->getType()->getPointerAddressSpace());
Value *Dest = OrigDest;
if (OrigDest->getType() != Int8PtrTy)
- Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", EarlierWrite);
+ Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
Value *Indices[1] = {
- ConstantInt::get(EarlierWriteLength->getType(), ToRemoveSize)};
+ ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
- Type::getInt8Ty(EarlierIntrinsic->getContext()),
- Dest, Indices, "", EarlierWrite);
- NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
+ Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
+ NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
if (NewDestGEP->getType() != OrigDest->getType())
NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
- "", EarlierWrite);
- EarlierIntrinsic->setDest(NewDestGEP);
+ "", DeadI);
+ DeadIntrinsic->setDest(NewDestGEP);
}
- // Finally update start and size of earlier access.
+ // Finally update start and size of dead access.
if (!IsOverwriteEnd)
- EarlierStart += ToRemoveSize;
- EarlierSize = NewSize;
+ DeadStart += ToRemoveSize;
+ DeadSize = NewSize;
return true;
}
-static bool tryToShortenEnd(Instruction *EarlierWrite,
- OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
+static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
return false;
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart > EarlierStart &&
- // Note: "LaterStart - EarlierStart" is known to be positive due to
+ if (KillingStart > DeadStart &&
+ // Note: "KillingStart - KillingStart" is known to be positive due to
// preceding check.
- (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
- // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
+ (uint64_t)(KillingStart - DeadStart) < DeadSize &&
+ // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
// be non negative due to preceding checks.
- LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, true)) {
+ KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ true)) {
IntervalMap.erase(OII);
return true;
}
@@ -697,28 +667,28 @@ static bool tryToShortenEnd(Instruction *EarlierWrite,
return false;
}
-static bool tryToShortenBegin(Instruction *EarlierWrite,
+static bool tryToShortenBegin(Instruction *DeadI,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
return false;
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart <= EarlierStart &&
- // Note: "EarlierStart - LaterStart" is known to be non negative due to
+ if (KillingStart <= DeadStart &&
+ // Note: "DeadStart - KillingStart" is known to be non negative due to
// preceding check.
- LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
- // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
- // positive due to preceding checks.
- assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
+ KillingSize > (uint64_t)(DeadStart - KillingStart)) {
+ // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
+ // be positive due to preceding checks.
+ assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
"Should have been handled as OW_Complete");
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, false)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ false)) {
IntervalMap.erase(OII);
return true;
}
@@ -726,71 +696,48 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
return false;
}
-static bool removePartiallyOverlappedStores(const DataLayout &DL,
- InstOverlapIntervalsTy &IOL,
- const TargetLibraryInfo &TLI) {
- bool Changed = false;
- for (auto OI : IOL) {
- Instruction *EarlierWrite = OI.first;
- MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
- assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
-
- const Value *Ptr = Loc.Ptr->stripPointerCasts();
- int64_t EarlierStart = 0;
- uint64_t EarlierSize = Loc.Size.getValue();
- GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
- OverlapIntervalsTy &IntervalMap = OI.second;
- Changed |=
- tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- if (IntervalMap.empty())
- continue;
- Changed |=
- tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- }
- return Changed;
-}
-
-static Constant *tryToMergePartialOverlappingStores(
- StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
- int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA,
- DominatorTree *DT) {
+static Constant *
+tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
+ int64_t KillingOffset, int64_t DeadOffset,
+ const DataLayout &DL, BatchAAResults &AA,
+ DominatorTree *DT) {
- if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
- Later && isa<ConstantInt>(Later->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) &&
- memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
+ if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
+ KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
+ memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
- // b) the later store is fully contained in the earlier one and
+ // b) the killing store is fully contained in the dead one and
// c) they both have a constant value
// d) none of the two stores need padding
- // Merge the two stores, replacing the earlier store's value with a
+ // Merge the two stores, replacing the dead store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
// some mem intrinsics (if needed)
- APInt EarlierValue =
- cast<ConstantInt>(Earlier->getValueOperand())->getValue();
- APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue();
- unsigned LaterBits = LaterValue.getBitWidth();
- assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
- LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+ APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
+ APInt KillingValue =
+ cast<ConstantInt>(KillingI->getValueOperand())->getValue();
+ unsigned KillingBits = KillingValue.getBitWidth();
+ assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
+ KillingValue = KillingValue.zext(DeadValue.getBitWidth());
// Offset of the smaller store inside the larger store
- unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
- unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() -
- BitOffsetDiff - LaterBits
- : BitOffsetDiff;
- APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
- LShiftAmount + LaterBits);
+ unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
+ unsigned LShiftAmount =
+ DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
+ : BitOffsetDiff;
+ APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
+ LShiftAmount + KillingBits);
// Clear the bits we'll be replacing, then OR with the smaller
// store, shifted appropriately.
- APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
- LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier
- << "\n Later: " << *Later
+ APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
+ LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
+ << "\n Killing: " << *KillingI
<< "\n Merged Value: " << Merged << '\n');
- return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged);
+ return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
}
return nullptr;
}
@@ -819,14 +766,17 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
+ const TargetLibraryInfo &TLI) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
if (auto *CB = dyn_cast<CallBase>(DI))
- if (CB->onlyAccessesInaccessibleMemory())
+ if (CB->onlyAccessesInaccessibleMemory()) {
+ if (isAllocLikeFn(DI, &TLI))
+ return false;
return true;
-
+ }
// We can eliminate stores to locations not visible to the caller across
// throwing instructions.
if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +791,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return true;
// Skip intrinsics that do not really read or modify memory.
- if (isNoopIntrinsic(D->getMemoryInst()))
+ if (isNoopIntrinsic(DI))
return true;
return false;
@@ -850,6 +800,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
struct DSEState {
Function &F;
AliasAnalysis &AA;
+ EarliestEscapeInfo EI;
/// The single BatchAA instance that is used to cache AA queries. It will
/// not be invalidated over the whole run. This is safe, because:
@@ -892,30 +843,29 @@ struct DSEState {
/// basic block.
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ // Class contains self-reference, make sure it's not copied/moved.
+ DSEState(const DSEState &) = delete;
+ DSEState &operator=(const DSEState &) = delete;
+
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
- : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
- DL(F.getParent()->getDataLayout()), LI(LI) {}
-
- static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
- DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI, const LoopInfo &LI) {
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
+ PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
for (BasicBlock *BB : post_order(&F)) {
- State.PostOrderNumbers[BB] = PO++;
+ PostOrderNumbers[BB] = PO++;
for (Instruction &I : *BB) {
MemoryAccess *MA = MSSA.getMemoryAccess(&I);
if (I.mayThrow() && !MA)
- State.ThrowingBlocks.insert(I.getParent());
+ ThrowingBlocks.insert(I.getParent());
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
- if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
- State.MemDefs.push_back(MD);
+ if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
+ (getLocForWriteEx(&I) || isMemTerminatorInst(&I)))
+ MemDefs.push_back(MD);
}
}
@@ -925,131 +875,134 @@ struct DSEState {
if (AI.hasPassPointeeByValueCopyAttr()) {
// For byval, the caller doesn't know the address of the allocation.
if (AI.hasByValAttr())
- State.InvisibleToCallerBeforeRet.insert({&AI, true});
- State.InvisibleToCallerAfterRet.insert({&AI, true});
+ InvisibleToCallerBeforeRet.insert({&AI, true});
+ InvisibleToCallerAfterRet.insert({&AI, true});
}
// Collect whether there is any irreducible control flow in the function.
- State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
-
- return State;
+ ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
}
- /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
- /// instruction) completely overwrites a store to the 'Earlier' location.
- /// (by \p EarlierI instruction).
- /// Return OW_MaybePartial if \p Later does not completely overwrite
- /// \p Earlier, but they both write to the same underlying object. In that
- /// case, use isPartialOverwrite to check if \p Later partially overwrites
- /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
- OverwriteResult
- isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
- const MemoryLocation &Later, const MemoryLocation &Earlier,
- int64_t &EarlierOff, int64_t &LaterOff) {
+ /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
+ /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
+ /// location (by \p DeadI instruction).
+ /// Return OW_MaybePartial if \p KillingI does not completely overwrite
+ /// \p DeadI, but they both write to the same underlying object. In that
+ /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
+ /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
+ OverwriteResult isOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
+ const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t &KillingOff, int64_t &DeadOff) {
// AliasAnalysis does not always account for loops. Limit overwrite checks
- // to dependencies for which we can guarantee they are independant of any
+ // to dependencies for which we can guarantee they are independent of any
// loops they are in.
- if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
+ if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
return OW_Unknown;
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
- if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+ if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
// In case no constant size is known, try to an IR values for the number
// of bytes written and check if they match.
- const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
- const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
- if (LaterMemI && EarlierMemI) {
- const Value *LaterV = LaterMemI->getLength();
- const Value *EarlierV = EarlierMemI->getLength();
- if (LaterV == EarlierV && BatchAA.isMustAlias(Earlier, Later))
+ const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
+ const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
+ if (KillingMemI && DeadMemI) {
+ const Value *KillingV = KillingMemI->getLength();
+ const Value *DeadV = DeadMemI->getLength();
+ if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
return OW_Complete;
}
// Masked stores have imprecise locations, but we can reason about them
// to some extent.
- return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA);
+ return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
}
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// Query the alias information
- AliasResult AAR = BatchAA.alias(Later, Earlier);
+ AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
// If the start pointers are the same, we just have to compare sizes to see if
- // the later store was larger than the earlier store.
+ // the killing store was larger than the dead store.
if (AAR == AliasResult::MustAlias) {
- // Make sure that the Later size is >= the Earlier size.
- if (LaterSize >= EarlierSize)
+ // Make sure that the KillingSize size is >= the DeadSize size.
+ if (KillingSize >= DeadSize)
return OW_Complete;
}
// If we hit a partial alias we may have a full overwrite
if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
int32_t Off = AAR.getOffset();
- if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
+ if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
return OW_Complete;
}
- // Check to see if the later store is to the entire object (either a global,
- // an alloca, or a byval/inalloca argument). If so, then it clearly
+ // Check to see if the killing store is to the entire object (either a
+ // global, an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
- const Value *P1 = Earlier.Ptr->stripPointerCasts();
- const Value *P2 = Later.Ptr->stripPointerCasts();
- const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
+ const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
+ const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
+ const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
+ const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
- if (UO1 != UO2)
+ if (DeadUndObj != KillingUndObj)
return OW_Unknown;
- // If the "Later" store is to a recognizable object, get its size.
- uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F);
- if (ObjectSize != MemoryLocation::UnknownSize)
- if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
+ // If the KillingI store is to a recognizable object, get its size.
+ uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
+ if (KillingUndObjSize != MemoryLocation::UnknownSize)
+ if (KillingUndObjSize == KillingSize && KillingUndObjSize >= DeadSize)
return OW_Complete;
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- EarlierOff = 0;
- LaterOff = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
+ DeadOff = 0;
+ KillingOff = 0;
+ const Value *DeadBasePtr =
+ GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
+ const Value *KillingBasePtr =
+ GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
- // If the base pointers still differ, we have two completely different stores.
- if (BP1 != BP2)
+ // If the base pointers still differ, we have two completely different
+ // stores.
+ if (DeadBasePtr != KillingBasePtr)
return OW_Unknown;
- // The later access completely overlaps the earlier store if and only if
- // both start and end of the earlier one is "inside" the later one:
- // |<->|--earlier--|<->|
- // |-------later-------|
+ // The killing access completely overlaps the dead store if and only if
+ // both start and end of the dead one is "inside" the killing one:
+ // |<->|--dead--|<->|
+ // |-----killing------|
// Accesses may overlap if and only if start of one of them is "inside"
// another one:
- // |<->|--earlier--|<----->|
- // |-------later-------|
+ // |<->|--dead--|<-------->|
+ // |-------killing--------|
// OR
- // |----- earlier -----|
- // |<->|---later---|<----->|
+ // |-------dead-------|
+ // |<->|---killing---|<----->|
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
- // Check if the earlier access starts "not before" the later one.
- if (EarlierOff >= LaterOff) {
- // If the earlier access ends "not after" the later access then the earlier
- // one is completely overwritten by the later one.
- if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+ // Check if the dead access starts "not before" the killing one.
+ if (DeadOff >= KillingOff) {
+ // If the dead access ends "not after" the killing access then the
+ // dead one is completely overwritten by the killing one.
+ if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
return OW_Complete;
- // If start of the earlier access is "before" end of the later access then
- // accesses overlap.
- else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+ // If start of the dead access is "before" end of the killing access
+ // then accesses overlap.
+ else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
return OW_MaybePartial;
}
- // If start of the later access is "before" end of the earlier access then
+ // If start of the killing access is "before" end of the dead access then
// accesses overlap.
- else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+ else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
return OW_MaybePartial;
}
@@ -1106,8 +1059,13 @@ struct DSEState {
LibFunc LF;
if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
switch (LF) {
- case LibFunc_strcpy:
case LibFunc_strncpy:
+ if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
+ return MemoryLocation(CB->getArgOperand(0),
+ LocationSize::precise(Len->getZExtValue()),
+ CB->getAAMetadata());
+ LLVM_FALLTHROUGH;
+ case LibFunc_strcpy:
case LibFunc_strcat:
case LibFunc_strncat:
return {MemoryLocation::getAfter(CB->getArgOperand(0))};
@@ -1145,8 +1103,8 @@ struct DSEState {
int64_t InstWriteOffset, DepWriteOffset;
if (auto CC = getLocForWriteEx(UseInst))
- return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
return false;
}
@@ -1248,9 +1206,10 @@ struct DSEState {
const Value *LocUO = getUnderlyingObject(Loc.Ptr);
return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
}
- int64_t InstWriteOffset, DepWriteOffset;
- return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
}
// Returns true if \p Use may read from \p DefLoc.
@@ -1270,10 +1229,6 @@ struct DSEState {
if (CB->onlyAccessesInaccessibleMemory())
return false;
- // NOTE: For calls, the number of stores removed could be slightly improved
- // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
- // be expensive compared to the benefits in practice. For now, avoid more
- // expensive analysis to limit compile-time.
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
@@ -1329,15 +1284,15 @@ struct DSEState {
return IsGuaranteedLoopInvariantBase(Ptr);
}
- // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
- // no read access between them or on any other path to a function exit block
- // if \p DefLoc is not accessible after the function returns. If there is no
- // such MemoryDef, return None. The returned value may not (completely)
- // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
- // MemoryUse (read).
+ // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
+ // with no read access between them or on any other path to a function exit
+ // block if \p KillingLoc is not accessible after the function returns. If
+ // there is no such MemoryDef, return None. The returned value may not
+ // (completely) overwrite \p KillingLoc. Currently we bail out when we
+ // encounter an aliasing MemoryUse (read).
Optional<MemoryAccess *>
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
- const MemoryLocation &DefLoc, const Value *DefUO,
+ const MemoryLocation &KillingLoc, const Value *KillingUndObj,
unsigned &ScanLimit, unsigned &WalkerStepLimit,
bool IsMemTerm, unsigned &PartialLimit) {
if (ScanLimit == 0 || WalkerStepLimit == 0) {
@@ -1389,19 +1344,20 @@ struct DSEState {
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
Instruction *CurrentI = CurrentDef->getMemoryInst();
- if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
+ if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
+ TLI))
continue;
// Before we try to remove anything, check for any extra throwing
// instructions that block us from DSEing
- if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
+ if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
return None;
}
// Check for anything that looks like it will be a barrier to further
// removal
- if (isDSEBarrier(DefUO, CurrentI)) {
+ if (isDSEBarrier(KillingUndObj, CurrentI)) {
LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
return None;
}
@@ -1410,14 +1366,14 @@ struct DSEState {
// clobber, bail out, as the path is not profitable. We skip this check
// for intrinsic calls, because the code knows how to handle memcpy
// intrinsics.
- if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+ if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
return None;
// Quick check if there are direct uses that are read-clobbers.
- if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
+ if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
return !MSSA.dominates(StartAccess, UseOrDef) &&
- isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+ isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
return false;
})) {
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
@@ -1450,9 +1406,10 @@ struct DSEState {
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
continue;
} else {
- int64_t InstWriteOffset, DepWriteOffset;
- auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc,
- DepWriteOffset, InstWriteOffset);
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
+ KillingOffset, DeadOffset);
// If Current does not write to the same object as KillingDef, check
// the next candidate.
if (OR == OW_Unknown)
@@ -1473,30 +1430,25 @@ struct DSEState {
};
// Accesses to objects accessible after the function returns can only be
- // eliminated if the access is killed along all paths to the exit. Collect
+ // eliminated if the access is dead along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
- // they cover all paths from EarlierAccess to any function exit.
+ // they cover all paths from MaybeDeadAccess to any function exit.
SmallPtrSet<Instruction *, 16> KillingDefs;
KillingDefs.insert(KillingDef->getMemoryInst());
- MemoryAccess *EarlierAccess = Current;
- Instruction *EarlierMemInst =
- cast<MemoryDef>(EarlierAccess)->getMemoryInst();
- LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
- << *EarlierMemInst << ")\n");
+ MemoryAccess *MaybeDeadAccess = Current;
+ MemoryLocation MaybeDeadLoc = *CurrentLoc;
+ Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
+ << *MaybeDeadI << ")\n");
SmallSetVector<MemoryAccess *, 32> WorkList;
auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
- PushMemUses(EarlierAccess);
+ PushMemUses(MaybeDeadAccess);
- // Optimistically collect all accesses for reads. If we do not find any
- // read clobbers, add them to the cache.
- SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
- if (!EarlierMemInst->mayReadFromMemory())
- KnownNoReads.insert(EarlierAccess);
- // Check if EarlierDef may be read.
+ // Check if DeadDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
@@ -1508,7 +1460,6 @@ struct DSEState {
}
--ScanLimit;
NumDomMemDefChecks++;
- KnownNoReads.insert(UseAccess);
if (isa<MemoryPhi>(UseAccess)) {
if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
@@ -1535,7 +1486,7 @@ struct DSEState {
// A memory terminator kills all preceeding MemoryDefs and all succeeding
// MemoryAccesses. We do not have to check it's users.
- if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
LLVM_DEBUG(
dbgs()
<< " ... skipping, memterminator invalidates following accesses\n");
@@ -1548,14 +1499,14 @@ struct DSEState {
continue;
}
- if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
+ if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
return None;
}
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(*CurrentLoc, UseInst)) {
+ if (isReadClobber(MaybeDeadLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return None;
}
@@ -1563,16 +1514,16 @@ struct DSEState {
// If this worklist walks back to the original memory access (and the
// pointer is not guarenteed loop invariant) then we cannot assume that a
// store kills itself.
- if (EarlierAccess == UseAccess &&
- !isGuaranteedLoopInvariant(CurrentLoc->Ptr)) {
+ if (MaybeDeadAccess == UseAccess &&
+ !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
return None;
}
- // Otherwise, for the KillingDef and EarlierAccess we only have to check
+ // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
// if it reads the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
- if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
+ if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
@@ -1581,18 +1532,18 @@ struct DSEState {
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
- // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
+ // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
- if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
- PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
LLVM_DEBUG(dbgs()
<< " ... found killing def " << *UseInst << "\n");
KillingDefs.insert(UseInst);
@@ -1608,9 +1559,9 @@ struct DSEState {
}
// For accesses to locations visible after the function returns, make sure
- // that the location is killed (=overwritten) along all paths from
- // EarlierAccess to the exit.
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ // that the location is dead (=overwritten) along all paths from
+ // MaybeDeadAccess to the exit.
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
SmallPtrSet<BasicBlock *, 16> KillingBlocks;
for (Instruction *KD : KillingDefs)
KillingBlocks.insert(KD->getParent());
@@ -1619,25 +1570,24 @@ struct DSEState {
// Find the common post-dominator of all killing blocks.
BasicBlock *CommonPred = *KillingBlocks.begin();
- for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
- I != E; I++) {
+ for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
if (!CommonPred)
break;
- CommonPred = PDT.findNearestCommonDominator(CommonPred, *I);
+ CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
}
// If CommonPred is in the set of killing blocks, just check if it
- // post-dominates EarlierAccess.
+ // post-dominates MaybeDeadAccess.
if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
- return {EarlierAccess};
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
+ return {MaybeDeadAccess};
return None;
}
- // If the common post-dominator does not post-dominate EarlierAccess,
- // there is a path from EarlierAccess to an exit not going through a
+ // If the common post-dominator does not post-dominate MaybeDeadAccess,
+ // there is a path from MaybeDeadAccess to an exit not going through a
// killing block.
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
SetVector<BasicBlock *> WorkList;
// If CommonPred is null, there are multiple exits from the function.
@@ -1650,16 +1600,16 @@ struct DSEState {
NumCFGTries++;
// Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching EarlierAccess.
+ // killing blocks before reaching MaybeDeadAccess.
for (unsigned I = 0; I < WorkList.size(); I++) {
NumCFGChecks++;
BasicBlock *Current = WorkList[I];
if (KillingBlocks.count(Current))
continue;
- if (Current == EarlierAccess->getBlock())
+ if (Current == MaybeDeadAccess->getBlock())
return None;
- // EarlierAccess is reachable from the entry, so we don't have to
+ // MaybeDeadAccess is reachable from the entry, so we don't have to
// explore unreachable blocks further.
if (!DT.isReachableFromEntry(Current))
continue;
@@ -1671,14 +1621,14 @@ struct DSEState {
return None;
}
NumCFGSuccess++;
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
return None;
}
- // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+ // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
// potentially dead.
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
// Delete dead memory defs
@@ -1701,6 +1651,7 @@ struct DSEState {
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
SkipStores.insert(MD);
}
+
Updater.removeMemoryAccess(MA);
}
@@ -1715,47 +1666,49 @@ struct DSEState {
NowDeadInsts.push_back(OpI);
}
+ EI.removeInstruction(DeadInst);
DeadInst->eraseFromParent();
}
}
- // Check for any extra throws between SI and NI that block DSE. This only
- // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
- // throw are handled during the walk from one def to the next.
- bool mayThrowBetween(Instruction *SI, Instruction *NI,
- const Value *SILocUnd) {
- // First see if we can ignore it by using the fact that SI is an
+ // Check for any extra throws between \p KillingI and \p DeadI that block
+ // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
+ // MemoryDef that may throw are handled during the walk from one def to the
+ // next.
+ bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
+ const Value *KillingUndObj) {
+ // First see if we can ignore it by using the fact that KillingI is an
// alloca/alloca like object that is not visible to the caller during
// execution of the function.
- if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
+ if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
return false;
- if (SI->getParent() == NI->getParent())
- return ThrowingBlocks.count(SI->getParent());
+ if (KillingI->getParent() == DeadI->getParent())
+ return ThrowingBlocks.count(KillingI->getParent());
return !ThrowingBlocks.empty();
}
- // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
- // act as barriers:
- // * A memory instruction that may throw and \p SI accesses a non-stack
+ // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
+ // instructions act as barriers:
+ // * A memory instruction that may throw and \p KillingI accesses a non-stack
// object.
// * Atomic stores stronger that monotonic.
- bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
- // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
- // like object that does not escape.
- if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
+ bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
+ // If DeadI may throw it acts as a barrier, unless we are to an
+ // alloca/alloca like object that does not escape.
+ if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
return true;
- // If NI is an atomic load/store stronger than monotonic, do not try to
+ // If DeadI is an atomic load/store stronger than monotonic, do not try to
// eliminate/reorder it.
- if (NI->isAtomic()) {
- if (auto *LI = dyn_cast<LoadInst>(NI))
+ if (DeadI->isAtomic()) {
+ if (auto *LI = dyn_cast<LoadInst>(DeadI))
return isStrongerThanMonotonic(LI->getOrdering());
- if (auto *SI = dyn_cast<StoreInst>(NI))
+ if (auto *SI = dyn_cast<StoreInst>(DeadI))
return isStrongerThanMonotonic(SI->getOrdering());
- if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
+ if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
return isStrongerThanMonotonic(ARMW->getOrdering());
- if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
+ if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
llvm_unreachable("other instructions should be skipped in MemorySSA");
@@ -1776,7 +1729,6 @@ struct DSEState {
continue;
Instruction *DefI = Def->getMemoryInst();
- SmallVector<const Value *, 4> Pointers;
auto DefLoc = getLocForWriteEx(DefI);
if (!DefLoc)
continue;
@@ -1787,7 +1739,7 @@ struct DSEState {
// uncommon. If it turns out to be important, we can use
// getUnderlyingObjects here instead.
const Value *UO = getUnderlyingObject(DefLoc->Ptr);
- if (!UO || !isInvisibleToCallerAfterRet(UO))
+ if (!isInvisibleToCallerAfterRet(UO))
continue;
if (isWriteAtEndOfFunction(Def)) {
@@ -1804,8 +1756,7 @@ struct DSEState {
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
- bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
- const Value *DefUO) {
+ bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
MemSetInst *MemSet = dyn_cast<MemSetInst>(Def->getMemoryInst());
Constant *StoredConstant = nullptr;
@@ -1816,13 +1767,78 @@ struct DSEState {
if (StoredConstant && StoredConstant->isNullValue()) {
auto *DefUOInst = dyn_cast<Instruction>(DefUO);
- if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
- auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
- // If UnderlyingDef is the clobbering access of Def, no instructions
- // between them can modify the memory location.
- auto *ClobberDef =
- MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
- return UnderlyingDef == ClobberDef;
+ if (DefUOInst) {
+ if (isCallocLikeFn(DefUOInst, &TLI)) {
+ auto *UnderlyingDef =
+ cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
+ // If UnderlyingDef is the clobbering access of Def, no instructions
+ // between them can modify the memory location.
+ auto *ClobberDef =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
+ return UnderlyingDef == ClobberDef;
+ }
+
+ if (MemSet) {
+ if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.getName() == "calloc")
+ return false;
+ auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
+ if (!Malloc)
+ return false;
+ auto *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return false;
+ LibFunc Func;
+ if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ Func != LibFunc_malloc)
+ return false;
+
+ auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
+ // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
+ // of malloc block
+ auto *MallocBB = Malloc->getParent(),
+ *MemsetBB = Memset->getParent();
+ if (MallocBB == MemsetBB)
+ return true;
+ auto *Ptr = Memset->getArgOperand(0);
+ auto *TI = MallocBB->getTerminator();
+ ICmpInst::Predicate Pred;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
+ FalseBB)))
+ return false;
+ if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
+ return false;
+ return true;
+ };
+
+ if (Malloc->getOperand(0) == MemSet->getLength()) {
+ if (shouldCreateCalloc(Malloc, MemSet) &&
+ DT.dominates(Malloc, MemSet) &&
+ memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
+ IRBuilder<> IRB(Malloc);
+ const auto &DL = Malloc->getModule()->getDataLayout();
+ if (auto *Calloc =
+ emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
+ Malloc->getArgOperand(0), IRB, TLI)) {
+ MemorySSAUpdater Updater(&MSSA);
+ auto *LastDef = cast<MemoryDef>(
+ Updater.getMemorySSA()->getMemoryAccess(Malloc));
+ auto *NewAccess = Updater.createMemoryAccessAfter(
+ cast<Instruction>(Calloc), LastDef, LastDef);
+ auto *NewAccessMD = cast<MemoryDef>(NewAccess);
+ Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
+ Updater.removeMemoryAccess(Malloc);
+ Malloc->replaceAllUsesWith(Calloc);
+ Malloc->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+ }
+ }
}
}
@@ -1875,6 +1891,76 @@ struct DSEState {
return false;
}
+
+ bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+ bool Changed = false;
+ for (auto OI : IOL) {
+ Instruction *DeadI = OI.first;
+ MemoryLocation Loc = *getLocForWriteEx(DeadI);
+ assert(isRemovable(DeadI) && "Expect only removable instruction");
+
+ const Value *Ptr = Loc.Ptr->stripPointerCasts();
+ int64_t DeadStart = 0;
+ uint64_t DeadSize = Loc.Size.getValue();
+ GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
+ OverlapIntervalsTy &IntervalMap = OI.second;
+ Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+ if (IntervalMap.empty())
+ continue;
+ Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+ }
+ return Changed;
+ }
+
+ /// Eliminates writes to locations where the value that is being written
+ /// is already stored at the same location.
+ bool eliminateRedundantStoresOfExistingValues() {
+ bool MadeChange = false;
+ LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
+ "already existing value\n");
+ for (auto *Def : MemDefs) {
+ if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
+ !isRemovable(Def->getMemoryInst()))
+ continue;
+ auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+ if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
+ continue;
+
+ Instruction *DefInst = Def->getMemoryInst();
+ Instruction *UpperInst = UpperDef->getMemoryInst();
+ auto IsRedundantStore = [this, DefInst,
+ UpperInst](MemoryLocation UpperLoc) {
+ if (DefInst->isIdenticalTo(UpperInst))
+ return true;
+ if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
+ if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
+ auto MaybeDefLoc = getLocForWriteEx(DefInst);
+ if (!MaybeDefLoc)
+ return false;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
+ InstWriteOffset, DepWriteOffset);
+ Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
+ return StoredByte && StoredByte == MemSetI->getOperand(1) &&
+ OR == OW_Complete;
+ }
+ }
+ return false;
+ };
+
+ auto MaybeUpperLoc = getLocForWriteEx(UpperInst);
+ if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) ||
+ isReadClobber(*MaybeUpperLoc, DefInst))
+ continue;
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
+ << '\n');
+ deleteDeadInstruction(DefInst);
+ NumRedundantStores++;
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
};
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -1883,68 +1969,64 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
+ DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
if (State.SkipStores.count(KillingDef))
continue;
- Instruction *SI = KillingDef->getMemoryInst();
+ Instruction *KillingI = KillingDef->getMemoryInst();
- Optional<MemoryLocation> MaybeSILoc;
- if (State.isMemTerminatorInst(SI))
- MaybeSILoc = State.getLocForTerminator(SI).map(
+ Optional<MemoryLocation> MaybeKillingLoc;
+ if (State.isMemTerminatorInst(KillingI))
+ MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
else
- MaybeSILoc = State.getLocForWriteEx(SI);
+ MaybeKillingLoc = State.getLocForWriteEx(KillingI);
- if (!MaybeSILoc) {
+ if (!MaybeKillingLoc) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
- << *SI << "\n");
+ << *KillingI << "\n");
continue;
}
- MemoryLocation SILoc = *MaybeSILoc;
- assert(SILoc.Ptr && "SILoc should not be null");
- const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
-
- MemoryAccess *Current = KillingDef;
+ MemoryLocation KillingLoc = *MaybeKillingLoc;
+ assert(KillingLoc.Ptr && "KillingLoc should not be null");
+ const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
- << *Current << " (" << *SI << ")\n");
+ << *KillingDef << " (" << *KillingI << ")\n");
unsigned ScanLimit = MemorySSAScanLimit;
unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
unsigned PartialLimit = MemorySSAPartialStoreLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
-
- if (SILocUnd)
- ToCheck.insert(KillingDef->getDefiningAccess());
+ ToCheck.insert(KillingDef->getDefiningAccess());
bool Shortend = false;
- bool IsMemTerm = State.isMemTerminatorInst(SI);
+ bool IsMemTerm = State.isMemTerminatorInst(KillingI);
// Check if MemoryAccesses in the worklist are killed by KillingDef.
for (unsigned I = 0; I < ToCheck.size(); I++) {
- Current = ToCheck[I];
+ MemoryAccess *Current = ToCheck[I];
if (State.SkipStores.count(Current))
continue;
- Optional<MemoryAccess *> Next = State.getDomMemoryDef(
- KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
- IsMemTerm, PartialLimit);
+ Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
+ KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
+ WalkerStepLimit, IsMemTerm, PartialLimit);
- if (!Next) {
+ if (!MaybeDeadAccess) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
- MemoryAccess *EarlierAccess = *Next;
- LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
- if (isa<MemoryPhi>(EarlierAccess)) {
+ MemoryAccess *DeadAccess = *MaybeDeadAccess;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
+ if (isa<MemoryPhi>(DeadAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
- for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
+ for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
- BasicBlock *PhiBlock = EarlierAccess->getBlock();
+ BasicBlock *PhiBlock = DeadAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
@@ -1955,72 +2037,73 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
continue;
}
- auto *NextDef = cast<MemoryDef>(EarlierAccess);
- Instruction *NI = NextDef->getMemoryInst();
- LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
- ToCheck.insert(NextDef->getDefiningAccess());
+ auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
+ Instruction *DeadI = DeadDefAccess->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
+ ToCheck.insert(DeadDefAccess->getDefiningAccess());
NumGetDomMemoryDefPassed++;
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
- MemoryLocation NILoc = *State.getLocForWriteEx(NI);
+ MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI);
if (IsMemTerm) {
- const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
- if (SILocUnd != NIUnd)
+ const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
+ if (KillingUndObj != DeadUndObj)
continue;
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
} else {
- // Check if NI overwrites SI.
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc,
- DepWriteOffset, InstWriteOffset);
+ // Check if DeadI overwrites KillingI.
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ OverwriteResult OR = State.isOverwrite(
+ KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
if (OR == OW_MaybePartial) {
auto Iter = State.IOLs.insert(
std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
- NI->getParent(), InstOverlapIntervalsTy()));
+ DeadI->getParent(), InstOverlapIntervalsTy()));
auto &IOL = Iter.first->second;
- OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
- NI, IOL);
+ OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
+ DeadOffset, DeadI, IOL);
}
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
- auto *Earlier = dyn_cast<StoreInst>(NI);
- auto *Later = dyn_cast<StoreInst>(SI);
+ auto *DeadSI = dyn_cast<StoreInst>(DeadI);
+ auto *KillingSI = dyn_cast<StoreInst>(KillingI);
// We are re-using tryToMergePartialOverlappingStores, which requires
- // Earlier to domiante Later.
+ // DeadSI to dominate DeadSI.
// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
- if (Earlier && Later && DT.dominates(Earlier, Later)) {
+ if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
if (Constant *Merged = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+ KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
State.BatchAA, &DT)) {
// Update stored value of earlier store to merged constant.
- Earlier->setOperand(0, Merged);
+ DeadSI->setOperand(0, Merged);
++NumModifiedStores;
MadeChange = true;
Shortend = true;
- // Remove later store and remove any outstanding overlap intervals
- // for the updated store.
- State.deleteDeadInstruction(Later);
- auto I = State.IOLs.find(Earlier->getParent());
+ // Remove killing store and remove any outstanding overlap
+ // intervals for the updated store.
+ State.deleteDeadInstruction(KillingSI);
+ auto I = State.IOLs.find(DeadSI->getParent());
if (I != State.IOLs.end())
- I->second.erase(Earlier);
+ I->second.erase(DeadSI);
break;
}
}
}
if (OR == OW_Complete) {
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
}
@@ -2028,10 +2111,11 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
// Check if the store is a no-op.
- if (!Shortend && isRemovable(SI) &&
- State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
- State.deleteDeadInstruction(SI);
+ if (!Shortend && isRemovable(KillingI) &&
+ State.storeIsNoop(KillingDef, KillingUndObj)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
+ << '\n');
+ State.deleteDeadInstruction(KillingI);
NumRedundantStores++;
MadeChange = true;
continue;
@@ -2040,8 +2124,9 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
if (EnablePartialOverwriteTracking)
for (auto &KV : State.IOLs)
- MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
+ MadeChange |= State.removePartiallyOverlappedStores(KV.second);
+ MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
}