diff options
Diffstat (limited to 'lib/Transforms/Scalar/SROA.cpp')
| -rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 153 |
1 files changed, 111 insertions, 42 deletions
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index de16b608f752..eab77cf4cda9 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -913,8 +913,7 @@ private: if (!IsOffsetKnown) return PI.setAborted(&II); - if (II.getIntrinsicID() == Intrinsic::lifetime_start || - II.getIntrinsicID() == Intrinsic::lifetime_end) { + if (II.isLifetimeStartOrEnd()) { ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(), Length->getLimitedValue()); @@ -1060,7 +1059,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) // Sort the uses. This arranges for the offsets to be in ascending order, // and the sizes to be in descending order. - llvm::sort(Slices.begin(), Slices.end()); + llvm::sort(Slices); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1211,7 +1210,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) { // predecessor blocks. The only thing to watch out for is that we can't put // a possibly trapping load in the predecessor if it is a critical edge. for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { - TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); + Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator(); Value *InVal = PN.getIncomingValue(Idx); // If the value is produced by the terminator of the predecessor (an @@ -1275,7 +1274,7 @@ static void speculatePHINodeLoads(PHINode &PN) { continue; } - TerminatorInst *TI = Pred->getTerminator(); + Instruction *TI = Pred->getTerminator(); IRBuilderTy PredBuilder(TI); LoadInst *Load = PredBuilder.CreateLoad( @@ -1400,8 +1399,8 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, if (Ty == TargetTy) return buildGEP(IRB, BasePtr, Indices, NamePrefix); - // Pointer size to use for the indices. - unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType()); + // Offset size to use for the indices. + unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType()); // See if we can descend into a struct and locate a field with the correct // type. @@ -1413,7 +1412,7 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) { ElementTy = ArrayTy->getElementType(); - Indices.push_back(IRB.getIntN(PtrSize, 0)); + Indices.push_back(IRB.getIntN(OffsetSize, 0)); } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) { ElementTy = VectorTy->getElementType(); Indices.push_back(IRB.getInt32(0)); @@ -1807,8 +1806,7 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, if (!S.isSplittable()) return false; // Skip any unsplittable intrinsics. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) + if (!II->isLifetimeStartOrEnd()) return false; } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. @@ -1906,7 +1904,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { "All non-integer types eliminated!"); return RHSTy->getNumElements() < LHSTy->getNumElements(); }; - llvm::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes); + llvm::sort(CandidateTys, RankVectorTypes); CandidateTys.erase( std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes), CandidateTys.end()); @@ -2029,8 +2027,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S, if (!S.isSplittable()) return false; // Skip any unsplittable intrinsics. } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { - if (II->getIntrinsicID() != Intrinsic::lifetime_start && - II->getIntrinsicID() != Intrinsic::lifetime_end) + if (!II->isLifetimeStartOrEnd()) return false; } else { return false; @@ -2377,7 +2374,7 @@ private: #endif return getAdjustedPtr(IRB, DL, &NewAI, - APInt(DL.getPointerTypeSizeInBits(PointerTy), Offset), + APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset), PointerTy, #ifndef NDEBUG Twine(OldName) + "." @@ -2593,7 +2590,8 @@ private: } V = convertValue(DL, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); - Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); + Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) Store->setAAMetadata(AATags); Pass.DeadInsts.insert(&SI); @@ -2662,7 +2660,8 @@ private: NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()), SI.isVolatile()); } - NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); + NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); if (AATags) NewSI->setAAMetadata(AATags); if (SI.isVolatile()) @@ -2899,8 +2898,8 @@ private: unsigned OtherAS = OtherPtrTy->getPointerAddressSpace(); // Compute the relative offset for the other pointer within the transfer. - unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS); - APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset); + unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS); + APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset); unsigned OtherAlign = IsDest ? II.getSourceAlignment() : II.getDestAlignment(); OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1, @@ -3011,8 +3010,7 @@ private: } bool visitIntrinsicInst(IntrinsicInst &II) { - assert(II.getIntrinsicID() == Intrinsic::lifetime_start || - II.getIntrinsicID() == Intrinsic::lifetime_end); + assert(II.isLifetimeStartOrEnd()); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); assert(II.getArgOperand(1) == OldPtr); @@ -3046,6 +3044,42 @@ private: return true; } + void fixLoadStoreAlign(Instruction &Root) { + // This algorithm implements the same visitor loop as + // hasUnsafePHIOrSelectUse, and fixes the alignment of each load + // or store found. + SmallPtrSet<Instruction *, 4> Visited; + SmallVector<Instruction *, 4> Uses; + Visited.insert(&Root); + Uses.push_back(&Root); + do { + Instruction *I = Uses.pop_back_val(); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned LoadAlign = LI->getAlignment(); + if (!LoadAlign) + LoadAlign = DL.getABITypeAlignment(LI->getType()); + LI->setAlignment(std::min(LoadAlign, getSliceAlign())); + continue; + } + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + unsigned StoreAlign = SI->getAlignment(); + if (!StoreAlign) { + Value *Op = SI->getOperand(0); + StoreAlign = DL.getABITypeAlignment(Op->getType()); + } + SI->setAlignment(std::min(StoreAlign, getSliceAlign())); + continue; + } + + assert(isa<BitCastInst>(I) || isa<PHINode>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I)); + for (User *U : I->users()) + if (Visited.insert(cast<Instruction>(U)).second) + Uses.push_back(cast<Instruction>(U)); + } while (!Uses.empty()); + } + bool visitPHINode(PHINode &PN) { LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); @@ -3069,6 +3103,9 @@ private: LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this PHI node. + fixLoadStoreAlign(PN); + // PHIs can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. @@ -3093,6 +3130,9 @@ private: LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this select. + fixLoadStoreAlign(SI); + // Selects can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. @@ -3122,7 +3162,12 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { /// value (as opposed to the user). Use *U; + /// Used to calculate offsets, and hence alignment, of subobjects. + const DataLayout &DL; + public: + AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {} + /// Rewrite loads and stores through a pointer and all pointers derived from /// it. bool rewrite(Instruction &I) { @@ -3166,10 +3211,22 @@ private: /// split operations. Value *Ptr; + /// The base pointee type being GEPed into. + Type *BaseTy; + + /// Known alignment of the base pointer. + unsigned BaseAlign; + + /// To calculate offset of each component so we can correctly deduce + /// alignments. + const DataLayout &DL; + /// Initialize the splitter with an insertion point, Ptr and start with a /// single zero GEP index. - OpSplitter(Instruction *InsertionPoint, Value *Ptr) - : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {} + OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, + unsigned BaseAlign, const DataLayout &DL) + : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), + BaseTy(BaseTy), BaseAlign(BaseAlign), DL(DL) {} public: /// Generic recursive split emission routine. @@ -3186,8 +3243,11 @@ private: /// \param Agg The aggregate value being built up or stored, depending on /// whether this is splitting a load or a store respectively. void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { - if (Ty->isSingleValueType()) - return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name); + if (Ty->isSingleValueType()) { + unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices); + return static_cast<Derived *>(this)->emitFunc( + Ty, Agg, MinAlign(BaseAlign, Offset), Name); + } if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { unsigned OldSize = Indices.size(); @@ -3226,17 +3286,19 @@ private: struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { AAMDNodes AATags; - LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags) - : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {} + LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, + AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL) + : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, + DL), AATags(AATags) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. - void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { + void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. Value *GEP = IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); - LoadInst *Load = IRB.CreateLoad(GEP, Name + ".load"); + LoadInst *Load = IRB.CreateAlignedLoad(GEP, Align, Name + ".load"); if (AATags) Load->setAAMetadata(AATags); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); @@ -3253,7 +3315,8 @@ private: LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); AAMDNodes AATags; LI.getAAMetadata(AATags); - LoadOpSplitter Splitter(&LI, *U, AATags); + LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, + getAdjustedAlignment(&LI, 0, DL), DL); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); LI.replaceAllUsesWith(V); @@ -3262,13 +3325,15 @@ private: } struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { - StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, AAMDNodes AATags) - : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr), AATags(AATags) {} + StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, + AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL) + : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, + DL), + AATags(AATags) {} AAMDNodes AATags; - /// Emit a leaf store of a single value. This is called at the leaves of the /// recursive emission to actually produce stores. - void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { + void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) { assert(Ty->isSingleValueType()); // Extract the single value and store it using the indices. // @@ -3278,7 +3343,8 @@ private: IRB.CreateExtractValue(Agg, Indices, Name + ".extract"); Value *InBoundsGEP = IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"); - StoreInst *Store = IRB.CreateStore(ExtractValue, InBoundsGEP); + StoreInst *Store = + IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Align); if (AATags) Store->setAAMetadata(AATags); LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); @@ -3296,7 +3362,8 @@ private: LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); AAMDNodes AATags; SI.getAAMetadata(AATags); - StoreOpSplitter Splitter(&SI, *U, AATags); + StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags, + getAdjustedAlignment(&SI, 0, DL), DL); Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); SI.eraseFromParent(); return true; @@ -3730,7 +3797,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PartPtrTy, BasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false, LI->getName()); - PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); // Append this load onto the list of split loads so we can find it later // to rewrite the stores. @@ -3786,7 +3854,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { APInt(DL.getIndexSizeInBits(AS), PartOffset), PartPtrTy, StoreBasePtr->getName() + "."), getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); - PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); + PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_access_group}); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } @@ -4179,7 +4248,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } if (!IsSorted) - llvm::sort(AS.begin(), AS.end()); + llvm::sort(AS); /// Describes the allocas introduced by rewritePartition in order to migrate /// the debug info. @@ -4212,7 +4281,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { // Migrate debug information from the old alloca to the new alloca(s) // and the individual partitions. - TinyPtrVector<DbgInfoIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI); + TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI); if (!DbgDeclares.empty()) { auto *Var = DbgDeclares.front()->getVariable(); auto *Expr = DbgDeclares.front()->getExpression(); @@ -4264,7 +4333,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) { } // Remove any existing intrinsics describing the same alloca. - for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) + for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) OldDII->eraseFromParent(); DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr, @@ -4314,7 +4383,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) { // First, split any FCA loads and stores touching this alloca to promote // better splitting and promotion opportunities. - AggLoadStoreRewriter AggRewriter; + AggLoadStoreRewriter AggRewriter(DL); Changed |= AggRewriter.rewrite(AI); // Build the slices using a recursive instruction-visiting builder. @@ -4379,7 +4448,7 @@ bool SROA::deleteDeadInstructions( // not be able to find it. if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) { DeletedAllocas.insert(AI); - for (DbgInfoIntrinsic *OldDII : FindDbgAddrUses(AI)) + for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI)) OldDII->eraseFromParent(); } |
