diff options
Diffstat (limited to 'lib/Transforms')
-rw-r--r-- | lib/Transforms/Scalar/LoopSink.cpp | 8 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 42 | ||||
-rw-r--r-- | lib/Transforms/Utils/CloneFunction.cpp | 23 | ||||
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 7 |
4 files changed, 73 insertions, 7 deletions
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp index 760177c9c5e9..7d62349d4719 100644 --- a/lib/Transforms/Scalar/LoopSink.cpp +++ b/lib/Transforms/Scalar/LoopSink.cpp @@ -152,6 +152,14 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs, } } + // Can't sink into blocks that have no valid insertion point. + for (BasicBlock *BB : BBsToSinkInto) { + if (BB->getFirstInsertionPt() == BB->end()) { + BBsToSinkInto.clear(); + break; + } + } + // If the total frequency of BBsToSinkInto is larger than preheader frequency, // do not sink. if (adjustedSumFreq(BBsToSinkInto, BFI) > diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index de16b608f752..bf482bf5272e 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -3046,6 +3046,42 @@ private: return true; } + void fixLoadStoreAlign(Instruction &Root) { + // This algorithm implements the same visitor loop as + // hasUnsafePHIOrSelectUse, and fixes the alignment of each load + // or store found. + SmallPtrSet<Instruction *, 4> Visited; + SmallVector<Instruction *, 4> Uses; + Visited.insert(&Root); + Uses.push_back(&Root); + do { + Instruction *I = Uses.pop_back_val(); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + unsigned LoadAlign = LI->getAlignment(); + if (!LoadAlign) + LoadAlign = DL.getABITypeAlignment(LI->getType()); + LI->setAlignment(std::min(LoadAlign, getSliceAlign())); + continue; + } + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + unsigned StoreAlign = SI->getAlignment(); + if (!StoreAlign) { + Value *Op = SI->getOperand(0); + StoreAlign = DL.getABITypeAlignment(Op->getType()); + } + SI->setAlignment(std::min(StoreAlign, getSliceAlign())); + continue; + } + + assert(isa<BitCastInst>(I) || isa<PHINode>(I) || + isa<SelectInst>(I) || isa<GetElementPtrInst>(I)); + for (User *U : I->users()) + if (Visited.insert(cast<Instruction>(U)).second) + Uses.push_back(cast<Instruction>(U)); + } while (!Uses.empty()); + } + bool visitPHINode(PHINode &PN) { LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); @@ -3069,6 +3105,9 @@ private: LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this PHI node. + fixLoadStoreAlign(PN); + // PHIs can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. @@ -3093,6 +3132,9 @@ private: LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this select. + fixLoadStoreAlign(SI); + // Selects can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 807360340055..9ae60962a631 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -636,6 +636,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { + // We need to simplify conditional branches and switches with a constant + // operand. We try to prune these out when cloning, but if the + // simplification required looking through PHI nodes, those are only + // available after forming the full basic block. That may leave some here, + // and we still want to prune the dead code as early as possible. + // + // Do the folding before we check if the block is dead since we want code + // like + // bb: + // br i1 undef, label %bb, label %bb + // to be simplified to + // bb: + // br label %bb + // before we call I->getSinglePredecessor(). + ConstantFoldTerminator(&*I); + // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. @@ -646,13 +662,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, continue; } - // We need to simplify conditional branches and switches with a constant - // operand. We try to prune these out when cloning, but if the - // simplification required looking through PHI nodes, those are only - // available after forming the full basic block. That may leave some here, - // and we still want to prune the dead code as early as possible. - ConstantFoldTerminator(&*I); - BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 859d0c92ca5a..1c7d0a63a5ca 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4510,6 +4510,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { for (auto OV : I->operand_values()) { if (isOutOfScope(OV)) continue; + // First order recurrence Phi's should typically be considered + // non-uniform. + auto *OP = dyn_cast<PHINode>(OV); + if (OP && Legal->isFirstOrderRecurrence(OP)) + continue; + // If all the users of the operand are uniform, then add the + // operand into the uniform worklist. auto *OI = cast<Instruction>(OV); if (llvm::all_of(OI->users(), [&](User *U) -> bool { auto *J = cast<Instruction>(U); |