summaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/SROA.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/SROA.cpp')
-rw-r--r--lib/Transforms/Scalar/SROA.cpp153
1 files changed, 111 insertions, 42 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index de16b608f752..eab77cf4cda9 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -913,8 +913,7 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&II);
- if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
- II.getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (II.isLifetimeStartOrEnd()) {
ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
Length->getLimitedValue());
@@ -1060,7 +1059,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
// Sort the uses. This arranges for the offsets to be in ascending order,
// and the sizes to be in descending order.
- llvm::sort(Slices.begin(), Slices.end());
+ llvm::sort(Slices);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1211,7 +1210,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
// predecessor blocks. The only thing to watch out for is that we can't put
// a possibly trapping load in the predecessor if it is a critical edge.
for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
- TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
+ Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator();
Value *InVal = PN.getIncomingValue(Idx);
// If the value is produced by the terminator of the predecessor (an
@@ -1275,7 +1274,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
continue;
}
- TerminatorInst *TI = Pred->getTerminator();
+ Instruction *TI = Pred->getTerminator();
IRBuilderTy PredBuilder(TI);
LoadInst *Load = PredBuilder.CreateLoad(
@@ -1400,8 +1399,8 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
if (Ty == TargetTy)
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
- // Pointer size to use for the indices.
- unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
+ // Offset size to use for the indices.
+ unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());
// See if we can descend into a struct and locate a field with the correct
// type.
@@ -1413,7 +1412,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
ElementTy = ArrayTy->getElementType();
- Indices.push_back(IRB.getIntN(PtrSize, 0));
+ Indices.push_back(IRB.getIntN(OffsetSize, 0));
} else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
ElementTy = VectorTy->getElementType();
Indices.push_back(IRB.getInt32(0));
@@ -1807,8 +1806,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
} else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
// Disable vector promotion when there are loads or stores of an FCA.
@@ -1906,7 +1904,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
"All non-integer types eliminated!");
return RHSTy->getNumElements() < LHSTy->getNumElements();
};
- llvm::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
+ llvm::sort(CandidateTys, RankVectorTypes);
CandidateTys.erase(
std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
CandidateTys.end());
@@ -2029,8 +2027,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
if (!S.isSplittable())
return false; // Skip any unsplittable intrinsics.
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end)
+ if (!II->isLifetimeStartOrEnd())
return false;
} else {
return false;
@@ -2377,7 +2374,7 @@ private:
#endif
return getAdjustedPtr(IRB, DL, &NewAI,
- APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset),
+ APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
PointerTy,
#ifndef NDEBUG
Twine(OldName) + "."
@@ -2593,7 +2590,8 @@ private:
}
V = convertValue(DL, IRB, V, NewAllocaTy);
StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+ Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
if (AATags)
Store->setAAMetadata(AATags);
Pass.DeadInsts.insert(&SI);
@@ -2662,7 +2660,8 @@ private:
NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
SI.isVolatile());
}
- NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+ NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
if (AATags)
NewSI->setAAMetadata(AATags);
if (SI.isVolatile())
@@ -2899,8 +2898,8 @@ private:
unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
// Compute the relative offset for the other pointer within the transfer.
- unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
- APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
+ unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
+ APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
unsigned OtherAlign =
IsDest ? II.getSourceAlignment() : II.getDestAlignment();
OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1,
@@ -3011,8 +3010,7 @@ private:
}
bool visitIntrinsicInst(IntrinsicInst &II) {
- assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
- II.getIntrinsicID() == Intrinsic::lifetime_end);
+ assert(II.isLifetimeStartOrEnd());
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getArgOperand(1) == OldPtr);
@@ -3046,6 +3044,42 @@ private:
return true;
}
+ void fixLoadStoreAlign(Instruction &Root) {
+ // This algorithm implements the same visitor loop as
+ // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
+ // or store found.
+ SmallPtrSet<Instruction *, 4> Visited;
+ SmallVector<Instruction *, 4> Uses;
+ Visited.insert(&Root);
+ Uses.push_back(&Root);
+ do {
+ Instruction *I = Uses.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ unsigned LoadAlign = LI->getAlignment();
+ if (!LoadAlign)
+ LoadAlign = DL.getABITypeAlignment(LI->getType());
+ LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ unsigned StoreAlign = SI->getAlignment();
+ if (!StoreAlign) {
+ Value *Op = SI->getOperand(0);
+ StoreAlign = DL.getABITypeAlignment(Op->getType());
+ }
+ SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
+ continue;
+ }
+
+ assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+ for (User *U : I->users())
+ if (Visited.insert(cast<Instruction>(U)).second)
+ Uses.push_back(cast<Instruction>(U));
+ } while (!Uses.empty());
+ }
+
bool visitPHINode(PHINode &PN) {
LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
@@ -3069,6 +3103,9 @@ private:
LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this PHI node.
+ fixLoadStoreAlign(PN);
+
// PHIs can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
@@ -3093,6 +3130,9 @@ private:
LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this select.
+ fixLoadStoreAlign(SI);
+
// Selects can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
@@ -3122,7 +3162,12 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
/// value (as opposed to the user).
Use *U;
+ /// Used to calculate offsets, and hence alignment, of subobjects.
+ const DataLayout &DL;
+
public:
+ AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
+
/// Rewrite loads and stores through a pointer and all pointers derived from
/// it.
bool rewrite(Instruction &I) {
@@ -3166,10 +3211,22 @@ private:
/// split operations.
Value *Ptr;
+ /// The base pointee type being GEPed into.
+ Type *BaseTy;
+
+ /// Known alignment of the base pointer.
+ unsigned BaseAlign;
+
+ /// To calculate offset of each component so we can correctly deduce
+ /// alignments.
+ const DataLayout &DL;
+
/// Initialize the splitter with an insertion point, Ptr and start with a
/// single zero GEP index.
- OpSplitter(Instruction *InsertionPoint, Value *Ptr)
- : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
+ OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+ unsigned BaseAlign, const DataLayout &DL)
+ : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
+ BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {}
public:
/// Generic recursive split emission routine.
@@ -3186,8 +3243,11 @@ private:
/// \param Agg The aggregate value being built up or stored, depending on
/// whether this is splitting a load or a store respectively.
void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
- if (Ty->isSingleValueType())
- return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
+ if (Ty->isSingleValueType()) {
+ unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
+ return static_cast<Derived *>(this)->emitFunc(
+ Ty, Agg, MinAlign(BaseAlign, Offset), Name);
+ }
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
unsigned OldSize = Indices.size();
@@ -3226,17 +3286,19 @@ private:
struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
AAMDNodes AATags;
- LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
- : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
+ LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+ AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
+ DL), AATags(AATags) {}
/// Emit a leaf load of a single value. This is called at the leaves of the
/// recursive emission to actually load values.
- void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
assert(Ty->isSingleValueType());
// Load the single value and insert it using the indices.
Value *GEP =
IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
- LoadInst *Load = IRB.CreateLoad(GEP, Name + ".load");
+ LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load");
if (AATags)
Load->setAAMetadata(AATags);
Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
@@ -3253,7 +3315,8 @@ private:
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
AAMDNodes AATags;
LI.getAAMetadata(AATags);
- LoadOpSplitter Splitter(&LI, *U, AATags);
+ LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
+ getAdjustedAlignment(&LI, 0, DL), DL);
Value *V = UndefValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
LI.replaceAllUsesWith(V);
@@ -3262,13 +3325,15 @@ private:
}
struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
- StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags)
- : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {}
+ StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
+ AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL)
+ : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
+ DL),
+ AATags(AATags) {}
AAMDNodes AATags;
-
/// Emit a leaf store of a single value. This is called at the leaves of the
/// recursive emission to actually produce stores.
- void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
+ void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) {
assert(Ty->isSingleValueType());
// Extract the single value and store it using the indices.
//
@@ -3278,7 +3343,8 @@ private:
IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
Value *InBoundsGEP =
IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
- StoreInst *Store = IRB.CreateStore(ExtractValue, InBoundsGEP);
+ StoreInst *Store =
+ IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align);
if (AATags)
Store->setAAMetadata(AATags);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
@@ -3296,7 +3362,8 @@ private:
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
AAMDNodes AATags;
SI.getAAMetadata(AATags);
- StoreOpSplitter Splitter(&SI, *U, AATags);
+ StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
+ getAdjustedAlignment(&SI, 0, DL), DL);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
SI.eraseFromParent();
return true;
@@ -3730,7 +3797,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PartPtrTy, BasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
- PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+ PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
// Append this load onto the list of split loads so we can find it later
// to rewrite the stores.
@@ -3786,7 +3854,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
APInt(DL.getIndexSizeInBits(AS), PartOffset),
PartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
- PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+ PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group});
LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
}
@@ -4179,7 +4248,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
if (!IsSorted)
- llvm::sort(AS.begin(), AS.end());
+ llvm::sort(AS);
/// Describes the allocas introduced by rewritePartition in order to migrate
/// the debug info.
@@ -4212,7 +4281,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
- TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
+ TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
if (!DbgDeclares.empty()) {
auto *Var = DbgDeclares.front()->getVariable();
auto *Expr = DbgDeclares.front()->getExpression();
@@ -4264,7 +4333,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
// Remove any existing intrinsics describing the same alloca.
- for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
+ for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca))
OldDII->eraseFromParent();
DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
@@ -4314,7 +4383,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
// First, split any FCA loads and stores touching this alloca to promote
// better splitting and promotion opportunities.
- AggLoadStoreRewriter AggRewriter;
+ AggLoadStoreRewriter AggRewriter(DL);
Changed |= AggRewriter.rewrite(AI);
// Build the slices using a recursive instruction-visiting builder.
@@ -4379,7 +4448,7 @@ bool SROA::deleteDeadInstructions(
// not be able to find it.
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
DeletedAllocas.insert(AI);
- for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(AI))
+ for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
OldDII->eraseFromParent();
}