aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms')
-rw-r--r--lib/Transforms/Scalar/LoopSink.cpp8
-rw-r--r--lib/Transforms/Scalar/SROA.cpp42
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp23
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp7
4 files changed, 73 insertions, 7 deletions
diff --git a/lib/Transforms/Scalar/LoopSink.cpp b/lib/Transforms/Scalar/LoopSink.cpp
index 760177c9c5e9..7d62349d4719 100644
--- a/lib/Transforms/Scalar/LoopSink.cpp
+++ b/lib/Transforms/Scalar/LoopSink.cpp
@@ -152,6 +152,14 @@ findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
}
}
+ // Can't sink into blocks that have no valid insertion point.
+ for (BasicBlock *BB : BBsToSinkInto) {
+ if (BB->getFirstInsertionPt() == BB->end()) {
+ BBsToSinkInto.clear();
+ break;
+ }
+ }
+
// If the total frequency of BBsToSinkInto is larger than preheader frequency,
// do not sink.
if (adjustedSumFreq(BBsToSinkInto, BFI) >
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index de16b608f752..bf482bf5272e 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3046,6 +3046,42 @@ private:
return true;
}
+ void fixLoadStoreAlign(Instruction &Root) {
+ // This algorithm implements the same visitor loop as
+ // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
+ // or store found.
+ SmallPtrSet<Instruction *, 4> Visited;
+ SmallVector<Instruction *, 4> Uses;
+ Visited.insert(&Root);
+ Uses.push_back(&Root);
+ do {
+ Instruction *I = Uses.pop_back_val();
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ unsigned LoadAlign = LI->getAlignment();
+ if (!LoadAlign)
+ LoadAlign = DL.getABITypeAlignment(LI->getType());
+ LI->setAlignment(std::min(LoadAlign, getSliceAlign()));
+ continue;
+ }
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ unsigned StoreAlign = SI->getAlignment();
+ if (!StoreAlign) {
+ Value *Op = SI->getOperand(0);
+ StoreAlign = DL.getABITypeAlignment(Op->getType());
+ }
+ SI->setAlignment(std::min(StoreAlign, getSliceAlign()));
+ continue;
+ }
+
+ assert(isa<BitCastInst>(I) || isa<PHINode>(I) ||
+ isa<SelectInst>(I) || isa<GetElementPtrInst>(I));
+ for (User *U : I->users())
+ if (Visited.insert(cast<Instruction>(U)).second)
+ Uses.push_back(cast<Instruction>(U));
+ } while (!Uses.empty());
+ }
+
bool visitPHINode(PHINode &PN) {
LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
@@ -3069,6 +3105,9 @@ private:
LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this PHI node.
+ fixLoadStoreAlign(PN);
+
// PHIs can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
@@ -3093,6 +3132,9 @@ private:
LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldPtr);
+ // Fix the alignment of any loads or stores using this select.
+ fixLoadStoreAlign(SI);
+
// Selects can't be promoted on their own, but often can be speculated. We
// check the speculation outside of the rewriter so that we see the
// fully-rewritten alloca.
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 807360340055..9ae60962a631 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -636,6 +636,22 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
Function::iterator I = Begin;
while (I != NewFunc->end()) {
+ // We need to simplify conditional branches and switches with a constant
+ // operand. We try to prune these out when cloning, but if the
+ // simplification required looking through PHI nodes, those are only
+ // available after forming the full basic block. That may leave some here,
+ // and we still want to prune the dead code as early as possible.
+ //
+ // Do the folding before we check if the block is dead since we want code
+ // like
+ // bb:
+ // br i1 undef, label %bb, label %bb
+ // to be simplified to
+ // bb:
+ // br label %bb
+ // before we call I->getSinglePredecessor().
+ ConstantFoldTerminator(&*I);
+
// Check if this block has become dead during inlining or other
// simplifications. Note that the first block will appear dead, as it has
// not yet been wired up properly.
@@ -646,13 +662,6 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
continue;
}
- // We need to simplify conditional branches and switches with a constant
- // operand. We try to prune these out when cloning, but if the
- // simplification required looking through PHI nodes, those are only
- // available after forming the full basic block. That may leave some here,
- // and we still want to prune the dead code as early as possible.
- ConstantFoldTerminator(&*I);
-
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 859d0c92ca5a..1c7d0a63a5ca 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4510,6 +4510,13 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) {
for (auto OV : I->operand_values()) {
if (isOutOfScope(OV))
continue;
+ // First order recurrence Phi's should typically be considered
+ // non-uniform.
+ auto *OP = dyn_cast<PHINode>(OV);
+ if (OP && Legal->isFirstOrderRecurrence(OP))
+ continue;
+ // If all the users of the operand are uniform, then add the
+ // operand into the uniform worklist.
auto *OI = cast<Instruction>(OV);
if (llvm::all_of(OI->users(), [&](User *U) -> bool {
auto *J = cast<Instruction>(U);