aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-14 18:50:02 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-14 18:50:02 +0000
commit1f917f69ff07f09b6dbb670971f57f8efe718b84 (patch)
tree99293cbc1411737cd995dac10a99b2c40ef0944c /llvm/lib/Transforms
parent145449b1e420787bb99721a429341fa6be3adfb6 (diff)
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp49
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp173
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp6
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp34
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp9
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp177
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp105
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp13
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp8
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp7
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp9
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp13
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp42
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp16
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp22
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp147
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp1
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp25
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp60
-rw-r--r--llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp29
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp47
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp65
-rw-r--r--llvm/lib/Transforms/Utils/LowerAtomic.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/MisExpect.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp205
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp74
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp189
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h51
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp733
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp18
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp73
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h62
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp365
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h2
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp69
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp174
63 files changed, 1923 insertions, 1418 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index d09607bb1c4c..51eb8ebf0369 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -881,16 +881,16 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
dwarf::DW_ATE_float,
llvm::DINode::FlagArtificial);
} else if (Ty->isPointerTy()) {
- // Construct BasicType instead of PointerType to avoid infinite
- // search problem.
- // For example, we would be in trouble if we traverse recursively:
+ // Construct PointerType points to null (aka void *) instead of exploring
+ // pointee type to avoid infinite search problem. For example, we would be
+ // in trouble if we traverse recursively:
//
// struct Node {
// Node* ptr;
// };
- RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
- dwarf::DW_ATE_address,
- llvm::DINode::FlagArtificial);
+ RetType = Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
+ Layout.getABITypeAlignment(Ty),
+ /*DWARFAddressSpace=*/None, Name);
} else if (Ty->isStructTy()) {
auto *DIStruct = Builder.createStructType(
Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
@@ -914,13 +914,21 @@ static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
RetType = DIStruct;
} else {
- LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n";);
- SmallString<32> Buffer;
- raw_svector_ostream OS(Buffer);
- OS << Name.str() << "_" << Layout.getTypeSizeInBits(Ty);
- RetType = Builder.createBasicType(OS.str(), Layout.getTypeSizeInBits(Ty),
- dwarf::DW_ATE_address,
- llvm::DINode::FlagArtificial);
+ LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
+ TypeSize Size = Layout.getTypeSizeInBits(Ty);
+ auto *CharSizeType = Builder.createBasicType(
+ Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
+
+ if (Size <= 8)
+ RetType = CharSizeType;
+ else {
+ if (Size % 8 != 0)
+ Size = TypeSize::Fixed(Size + 8 - (Size % 8));
+
+ RetType = Builder.createArrayType(
+ Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
+ Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
+ }
}
DITypeCache.insert({Ty, RetType});
@@ -971,7 +979,8 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
unsigned LineNum = PromiseDIVariable->getLine();
DICompositeType *FrameDITy = DBuilder.createStructType(
- DIS, "__coro_frame_ty", DFile, LineNum, Shape.FrameSize * 8,
+ DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
+ DFile, LineNum, Shape.FrameSize * 8,
Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
llvm::DINodeArray());
StructType *FrameTy = Shape.FrameTy;
@@ -995,14 +1004,12 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
*IndexTy = FrameTy->getElementType(IndexIndex);
DenseMap<unsigned, DIType *> TyCache;
- TyCache.insert({ResumeIndex,
- DBuilder.createBasicType("__resume_fn",
- Layout.getTypeSizeInBits(ResumeFnTy),
- dwarf::DW_ATE_address)});
TyCache.insert(
- {DestroyIndex, DBuilder.createBasicType(
- "__destroy_fn", Layout.getTypeSizeInBits(DestroyFnTy),
- dwarf::DW_ATE_address)});
+ {ResumeIndex, DBuilder.createPointerType(
+ nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
+ TyCache.insert(
+ {DestroyIndex, DBuilder.createPointerType(
+ nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
/// FIXME: If we fill the field `SizeInBits` with the actual size of
/// __coro_index in bits, then __coro_index wouldn't show in the debugger.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index ead552d9be4e..9c1b247cdb39 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -389,7 +389,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
// Replace CoroSave with a store to Index:
// %index.addr = getelementptr %f.frame... (index field number)
- // store i32 0, i32* %index.addr1
+ // store i32 %IndexVal, i32* %index.addr1
auto *Save = S->getCoroSave();
Builder.SetInsertPoint(Save);
if (S->isFinal()) {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index b05b7990e3f0..e5ff98e4f73f 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -718,8 +718,8 @@ Argument *IRPosition::getAssociatedArgument() const {
}
// If we found a unique callback candidate argument, return it.
- if (CBCandidateArg && CBCandidateArg.getValue())
- return CBCandidateArg.getValue();
+ if (CBCandidateArg && CBCandidateArg.value())
+ return CBCandidateArg.value();
// If no callbacks were found, or none used the underlying call site operand
// exclusively, use the direct callee argument if available.
@@ -1048,11 +1048,11 @@ Attributor::getAssumedConstant(const IRPosition &IRP,
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return llvm::None;
}
- if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue())) {
+ if (isa_and_nonnull<UndefValue>(SimplifiedV.value())) {
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return UndefValue::get(IRP.getAssociatedType());
}
- Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.getValue());
+ Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.value());
if (CI)
CI = dyn_cast_or_null<Constant>(
AA::getWithType(*CI, *IRP.getAssociatedType()));
@@ -2697,8 +2697,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
Optional<short> &NumUses = AssumeUsesMap[I];
if (!NumUses)
NumUses = I->getNumUses();
- NumUses = NumUses.getValue() - /* this assume */ 1;
- if (NumUses.getValue() != 0)
+ NumUses = NumUses.value() - /* this assume */ 1;
+ if (NumUses.value() != 0)
continue;
AssumeOnlyValues.insert(I);
for (const Value *Op : I->operands())
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 4d99ce7e3175..1ff54b78e27e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -437,7 +437,7 @@ static bool genericValueTraversal(
A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
if (!SimpleV)
continue;
- Value *NewV = SimpleV.getValue();
+ Value *NewV = SimpleV.value();
if (NewV && NewV != V) {
if ((VS & AA::Interprocedural) || !CtxI ||
AA::isValidInScope(*NewV, CtxI->getFunction())) {
@@ -1891,14 +1891,14 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
// Check if we have an assumed unique return value that we could manifest.
Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
- if (!UniqueRV || !UniqueRV.getValue())
+ if (!UniqueRV || !UniqueRV.value())
return Changed;
// Bookkeeping.
STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
"Number of function with unique return");
// If the assumed unique return value is an argument, annotate it.
- if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
+ if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.value())) {
if (UniqueRVArg->getType()->canLosslesslyBitCastTo(
getAssociatedFunction()->getReturnType())) {
getIRPosition() = IRPosition::argument(*UniqueRVArg);
@@ -2666,9 +2666,9 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// Either we stopped and the appropriate action was taken,
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I);
- if (!SimplifiedPtrOp || !SimplifiedPtrOp.getValue())
+ if (!SimplifiedPtrOp || !SimplifiedPtrOp.value())
return true;
- const Value *PtrOpVal = SimplifiedPtrOp.getValue();
+ const Value *PtrOpVal = SimplifiedPtrOp.value();
// A memory access through a pointer is considered UB
// only if the pointer has constant null value.
@@ -2757,14 +2757,14 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
IRPosition::value(*ArgVal), *this, UsedAssumedInformation);
if (UsedAssumedInformation)
continue;
- if (SimplifiedVal && !SimplifiedVal.getValue())
+ if (SimplifiedVal && !SimplifiedVal.value())
return true;
- if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.getValue())) {
+ if (!SimplifiedVal || isa<UndefValue>(*SimplifiedVal.value())) {
KnownUBInsts.insert(&I);
continue;
}
if (!ArgVal->getType()->isPointerTy() ||
- !isa<ConstantPointerNull>(*SimplifiedVal.getValue()))
+ !isa<ConstantPointerNull>(*SimplifiedVal.value()))
continue;
auto &NonNullAA =
A.getAAFor<AANonNull>(*this, CalleeArgumentIRP, DepClassTy::NONE);
@@ -4101,11 +4101,11 @@ identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation);
- if (!C || isa_and_nonnull<UndefValue>(C.getValue())) {
+ if (!C || isa_and_nonnull<UndefValue>(C.value())) {
// No value yet, assume all edges are dead.
- } else if (isa_and_nonnull<ConstantInt>(C.getValue())) {
+ } else if (isa_and_nonnull<ConstantInt>(C.value())) {
for (auto &CaseIt : SI.cases()) {
- if (CaseIt.getCaseValue() == C.getValue()) {
+ if (CaseIt.getCaseValue() == C.value()) {
AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
return UsedAssumedInformation;
}
@@ -5523,11 +5523,10 @@ struct AAValueSimplifyImpl : AAValueSimplify {
if (!SimpleV)
return PoisonValue::get(&Ty);
Value *EffectiveV = &V;
- if (SimpleV.getValue())
- EffectiveV = SimpleV.getValue();
+ if (SimpleV.value())
+ EffectiveV = SimpleV.value();
if (auto *C = dyn_cast<Constant>(EffectiveV))
- if (!C->canTrap())
- return C;
+ return C;
if (CtxI && AA::isValidAtPosition(AA::ValueAndContext(*EffectiveV, *CtxI),
A.getInfoCache()))
return ensureType(A, *EffectiveV, Ty, CtxI, Check);
@@ -5541,7 +5540,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
/// nullptr if we don't have one that makes sense.
Value *manifestReplacementValue(Attributor &A, Instruction *CtxI) const {
Value *NewV = SimplifiedAssociatedValue
- ? SimplifiedAssociatedValue.getValue()
+ ? SimplifiedAssociatedValue.value()
: UndefValue::get(getAssociatedType());
if (NewV && NewV != &getAssociatedValue()) {
ValueToValueMapTy VMap;
@@ -5672,7 +5671,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation);
if (!SimpleArgOp)
return true;
- if (!SimpleArgOp.getValue())
+ if (!SimpleArgOp.value())
return false;
if (!AA::isDynamicallyUnique(A, *this, **SimpleArgOp))
return false;
@@ -5787,7 +5786,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return true;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
@@ -5796,7 +5795,7 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return true;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return false;
RHS = *SimplifiedRHS;
@@ -5868,8 +5867,8 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
if (!SimplifiedOp)
return true;
- if (SimplifiedOp.getValue())
- NewOps[Idx] = SimplifiedOp.getValue();
+ if (SimplifiedOp.value())
+ NewOps[Idx] = SimplifiedOp.value();
else
NewOps[Idx] = Op;
@@ -6112,6 +6111,10 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
/// but which is not in the deallocation infos.
bool HasPotentiallyFreeingUnknownUses = false;
+ /// Flag to indicate that we should place the new alloca in the function
+ /// entry block rather than where the call site (CB) is.
+ bool MoveAllocaIntoEntry = true;
+
/// The set of free calls that use this allocation.
SmallSetVector<CallBase *, 1> PotentialFreeCalls{};
};
@@ -6242,17 +6245,6 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
- LoopInfo *LI =
- A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
- Optional<bool> MayContainIrreducibleControl;
- auto IsInLoop = [&](BasicBlock &BB) {
- if (!MayContainIrreducibleControl.has_value())
- MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
- if (MayContainIrreducibleControl.value())
- return true;
- return LI->getLoopFor(&BB) != nullptr;
- };
-
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
@@ -6294,25 +6286,25 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
Size = SizeOffsetPair.first;
}
- Instruction *IP = (!SizeAPI.has_value() || IsInLoop(*AI.CB->getParent()))
- ? AI.CB
- : &F->getEntryBlock().front();
+ Instruction *IP =
+ AI.MoveAllocaIntoEntry ? &F->getEntryBlock().front() : AI.CB;
Align Alignment(1);
if (MaybeAlign RetAlign = AI.CB->getRetAlign())
Alignment = std::max(Alignment, *RetAlign);
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
Optional<APInt> AlignmentAPI = getAPInt(A, *this, *Align);
- assert(AlignmentAPI && AlignmentAPI.getValue().getZExtValue() > 0 &&
+ assert(AlignmentAPI && AlignmentAPI.value().getZExtValue() > 0 &&
"Expected an alignment during manifest!");
Alignment = std::max(
- Alignment, assumeAligned(AlignmentAPI.getValue().getZExtValue()));
+ Alignment, assumeAligned(AlignmentAPI.value().getZExtValue()));
}
// TODO: Hoist the alloca towards the function entry.
unsigned AS = DL.getAllocaAddrSpace();
- Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
- Size, Alignment, "", IP);
+ Instruction *Alloca =
+ new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
+ AI.CB->getName() + ".h2s", IP);
if (Alloca->getType() != AI.CB->getType())
Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
@@ -6354,7 +6346,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
A.getAssumedConstant(V, AA, UsedAssumedInformation);
if (!SimpleV)
return APInt(64, 0);
- if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.value()))
return CI->getValue();
return llvm::None;
}
@@ -6400,6 +6392,21 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
bool StackIsAccessibleByOtherThreads =
A.getInfoCache().stackIsAccessibleByOtherThreads();
+ LoopInfo *LI =
+ A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(*F);
+ Optional<bool> MayContainIrreducibleControl;
+ auto IsInLoop = [&](BasicBlock &BB) {
+ if (&F->getEntryBlock() == &BB)
+ return false;
+ if (!MayContainIrreducibleControl.has_value())
+ MayContainIrreducibleControl = mayContainIrreducibleControl(*F, LI);
+ if (MayContainIrreducibleControl.value())
+ return true;
+ if (!LI)
+ return true;
+ return LI->getLoopFor(&BB) != nullptr;
+ };
+
// Flag to ensure we update our deallocation information at most once per
// updateImpl call and only if we use the free check reasoning.
bool HasUpdatedFrees = false;
@@ -6617,21 +6624,20 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
- } else {
- if (APAlign->ugt(llvm::Value::MaximumAlignment) ||
- !APAlign->isPowerOf2()) {
- LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign
- << "\n");
- AI.Status = AllocationInfo::INVALID;
- Changed = ChangeStatus::CHANGED;
- continue;
- }
+ }
+ if (APAlign->ugt(llvm::Value::MaximumAlignment) ||
+ !APAlign->isPowerOf2()) {
+ LLVM_DEBUG(dbgs() << "[H2S] Invalid allocation alignment: " << APAlign
+ << "\n");
+ AI.Status = AllocationInfo::INVALID;
+ Changed = ChangeStatus::CHANGED;
+ continue;
}
}
+ Optional<APInt> Size = getSize(A, *this, AI);
if (MaxHeapToStackSize != -1) {
- Optional<APInt> Size = getSize(A, *this, AI);
- if (!Size || Size.getValue().ugt(MaxHeapToStackSize)) {
+ if (!Size || Size.value().ugt(MaxHeapToStackSize)) {
LLVM_DEBUG({
if (!Size)
dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n";
@@ -6649,18 +6655,23 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
switch (AI.Status) {
case AllocationInfo::STACK_DUE_TO_USE:
if (UsesCheck(AI))
- continue;
+ break;
AI.Status = AllocationInfo::STACK_DUE_TO_FREE;
LLVM_FALLTHROUGH;
case AllocationInfo::STACK_DUE_TO_FREE:
if (FreeCheck(AI))
- continue;
+ break;
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
- continue;
+ break;
case AllocationInfo::INVALID:
llvm_unreachable("Invalid allocations should never reach this point!");
};
+
+ // Check if we still think we can move it into the entry block.
+ if (AI.MoveAllocaIntoEntry &&
+ (!Size.has_value() || IsInLoop(*AI.CB->getParent())))
+ AI.MoveAllocaIntoEntry = false;
}
return Changed;
@@ -6748,8 +6759,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
- if (CSTy && CSTy.getValue())
- CSTy.getValue()->print(dbgs());
+ if (CSTy && CSTy.value())
+ CSTy.value()->print(dbgs());
else if (CSTy)
dbgs() << "<nullptr>";
else
@@ -6760,8 +6771,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
LLVM_DEBUG({
dbgs() << " : New Type: ";
- if (Ty && Ty.getValue())
- Ty.getValue()->print(dbgs());
+ if (Ty && Ty.value())
+ Ty.value()->print(dbgs());
else if (Ty)
dbgs() << "<nullptr>";
else
@@ -6769,7 +6780,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
dbgs() << "\n";
});
- return !Ty || Ty.getValue();
+ return !Ty || Ty.value();
};
if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
@@ -6783,7 +6794,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
- if (!PrivatizableType.getValue())
+ if (!PrivatizableType.value())
return indicatePessimisticFixpoint();
// The dependence is optional so we don't give up once we give up on the
@@ -6871,7 +6882,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
if (!CBArgPrivTy)
continue;
- if (CBArgPrivTy.getValue() == PrivatizableType)
+ if (CBArgPrivTy.value() == PrivatizableType)
continue;
}
@@ -6918,7 +6929,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
if (!DCArgPrivTy)
return true;
- if (DCArgPrivTy.getValue() == PrivatizableType)
+ if (DCArgPrivTy.value() == PrivatizableType)
return true;
}
}
@@ -7060,7 +7071,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
ChangeStatus manifest(Attributor &A) override {
if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
- assert(PrivatizableType.getValue() && "Expected privatizable type!");
+ assert(PrivatizableType.value() && "Expected privatizable type!");
// Collect all tail calls in the function as we cannot allow new allocas to
// escape into tail recursion.
@@ -7093,9 +7104,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Instruction *IP = &*EntryBB.getFirstInsertionPt();
const DataLayout &DL = IP->getModule()->getDataLayout();
unsigned AS = DL.getAllocaAddrSpace();
- Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS,
+ Instruction *AI = new AllocaInst(PrivatizableType.value(), AS,
Arg->getName() + ".priv", IP);
- createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
+ createInitialization(PrivatizableType.value(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
if (AI->getType() != Arg->getType())
@@ -7203,7 +7214,7 @@ struct AAPrivatizablePtrCallSiteArgument final
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType)
return ChangeStatus::UNCHANGED;
- if (!PrivatizableType.getValue())
+ if (!PrivatizableType.value())
return indicatePessimisticFixpoint();
const IRPosition &IRP = getIRPosition();
@@ -8664,7 +8675,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return true;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
@@ -8673,7 +8684,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return true;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return false;
RHS = *SimplifiedRHS;
@@ -8717,7 +8728,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedOpV)
return true;
- if (!SimplifiedOpV.getValue())
+ if (!SimplifiedOpV.value())
return false;
OpV = *SimplifiedOpV;
@@ -8747,7 +8758,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return true;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return false;
LHS = *SimplifiedLHS;
@@ -8756,7 +8767,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return true;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return false;
RHS = *SimplifiedRHS;
@@ -8821,7 +8832,7 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
*this, UsedAssumedInformation);
if (!SimplifiedOpV)
return true;
- if (!SimplifiedOpV.getValue())
+ if (!SimplifiedOpV.value())
return false;
Value *VPtr = *SimplifiedOpV;
@@ -9182,7 +9193,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
@@ -9191,7 +9202,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
@@ -9265,7 +9276,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
@@ -9274,7 +9285,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
@@ -9340,7 +9351,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedSrc)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedSrc.getValue())
+ if (!SimplifiedSrc.value())
return indicatePessimisticFixpoint();
Src = *SimplifiedSrc;
@@ -9373,7 +9384,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedLHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedLHS.getValue())
+ if (!SimplifiedLHS.value())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
@@ -9382,7 +9393,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
*this, UsedAssumedInformation);
if (!SimplifiedRHS)
return ChangeStatus::UNCHANGED;
- if (!SimplifiedRHS.getValue())
+ if (!SimplifiedRHS.value())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
@@ -9441,7 +9452,7 @@ struct AAPotentialConstantValuesFloating : AAPotentialConstantValuesImpl {
UsedAssumedInformation);
if (!SimplifiedIncomingValue)
continue;
- if (!SimplifiedIncomingValue.getValue())
+ if (!SimplifiedIncomingValue.value())
return indicatePessimisticFixpoint();
IncomingValue = *SimplifiedIncomingValue;
@@ -9930,7 +9941,7 @@ private:
const Function &Fn) {
Optional<bool> Cached = isCachedReachable(Fn);
if (Cached)
- return Cached.getValue();
+ return Cached.value();
// The query was not cached, thus it is new. We need to request an update
// explicitly to make sure this the information is properly run to a
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 1a1bde4f0668..1ad6e2b2a1d2 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1584,11 +1584,6 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
}
Value *StoredOnceValue = GS.getStoredOnceValue();
if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
- // Avoid speculating constant expressions that might trap (div/rem).
- auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
- if (SOVConstant && SOVConstant->canTrap())
- return Changed;
-
Function &StoreFn =
const_cast<Function &>(*GS.StoredOnceStore->getFunction());
bool CanHaveNonUndefGlobalInitializer =
@@ -1601,6 +1596,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
// This is restricted to address spaces that allow globals to have
// initializers. NVPTX, for example, does not support initializers for
// shared memory (AS 3).
+ auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
DL.getTypeAllocSize(SOVConstant->getType()) ==
DL.getTypeAllocSize(GV->getValueType()) &&
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index d75d99e307fd..28bc43aa1633 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -555,7 +555,7 @@ collectRegionsConstants(OutlinableRegion &Region,
for (Value *V : ID.OperVals) {
Optional<unsigned> GVNOpt = C.getGVN(V);
assert(GVNOpt && "Expected a GVN for operand?");
- unsigned GVN = GVNOpt.getValue();
+ unsigned GVN = GVNOpt.value();
// Check if this global value has been found to not be the same already.
if (NotSame.contains(GVN)) {
@@ -570,7 +570,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// it is considered to not be the same value.
Optional<bool> ConstantMatches = constantMatches(V, GVN, GVNToConstant);
if (ConstantMatches) {
- if (ConstantMatches.getValue())
+ if (ConstantMatches.value())
continue;
else
ConstantsTheSame = false;
@@ -651,7 +651,7 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
// Transfer the swifterr attribute to the correct function parameter.
if (Group.SwiftErrorArgument)
- Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.getValue(),
+ Group.OutlinedFunction->addParamAttr(Group.SwiftErrorArgument.value(),
Attribute::SwiftError);
Group.OutlinedFunction->addFnAttr(Attribute::OptimizeForSize);
@@ -809,7 +809,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C,
if (OutputMappings.find(Input) != OutputMappings.end())
Input = OutputMappings.find(Input)->second;
assert(C.getGVN(Input) && "Could not find a numbering for the given input");
- EndInputNumbers.push_back(C.getGVN(Input).getValue());
+ EndInputNumbers.push_back(C.getGVN(Input).value());
}
}
@@ -948,11 +948,11 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
for (unsigned InputVal : InputGVNs) {
Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
assert(CanonicalNumberOpt && "Canonical number not found?");
- unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
+ unsigned CanonicalNumber = CanonicalNumberOpt.value();
Optional<Value *> InputOpt = C.fromGVN(InputVal);
assert(InputOpt && "Global value number not found?");
- Value *Input = InputOpt.getValue();
+ Value *Input = InputOpt.value();
DenseMap<unsigned, unsigned>::iterator AggArgIt =
Group.CanonicalNumberToAggArg.find(CanonicalNumber);
@@ -1236,13 +1236,13 @@ static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
assert(BBGVN && "Could not find GVN for the incoming block!");
- BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
+ BBGVN = Cand.getCanonicalNum(BBGVN.value());
assert(BBGVN && "Could not find canonical number for the incoming block!");
// Create a pair of the exit block canonical value, and the aggregate
// argument location, connected to the canonical numbers stored in the
// PHINode.
PHINodeData TemporaryPair =
- std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs);
+ std::make_pair(std::make_pair(BBGVN.value(), AggArgIdx), PHIGVNs);
hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
// Look for and create a new entry in our connection between canonical
@@ -1516,8 +1516,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// Make sure that the argument in the new function has the SwiftError
// argument.
if (Group.SwiftErrorArgument)
- Call->addParamAttr(Group.SwiftErrorArgument.getValue(),
- Attribute::SwiftError);
+ Call->addParamAttr(Group.SwiftErrorArgument.value(), Attribute::SwiftError);
return Call;
}
@@ -2082,9 +2081,9 @@ static void alignOutputBlockWithAggFunc(
if (MatchingBB) {
LLVM_DEBUG(dbgs() << "Set output block for region in function"
<< Region.ExtractedFunction << " to "
- << MatchingBB.getValue());
+ << MatchingBB.value());
- Region.OutputBlockNum = MatchingBB.getValue();
+ Region.OutputBlockNum = MatchingBB.value();
for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
VtoBB.second->eraseFromParent();
return;
@@ -2679,15 +2678,14 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
if (!OutputIdx)
return;
- if (OutputMappings.find(Outputs[OutputIdx.getValue()]) ==
- OutputMappings.end()) {
+ if (OutputMappings.find(Outputs[OutputIdx.value()]) == OutputMappings.end()) {
LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
- << *Outputs[OutputIdx.getValue()] << "\n");
- OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.getValue()]));
+ << *Outputs[OutputIdx.value()] << "\n");
+ OutputMappings.insert(std::make_pair(LI, Outputs[OutputIdx.value()]));
} else {
- Value *Orig = OutputMappings.find(Outputs[OutputIdx.getValue()])->second;
+ Value *Orig = OutputMappings.find(Outputs[OutputIdx.value()])->second;
LLVM_DEBUG(dbgs() << "Mapping extracted output " << *Orig << " to "
- << *Outputs[OutputIdx.getValue()] << "\n");
+ << *Outputs[OutputIdx.value()] << "\n");
OutputMappings.insert(std::make_pair(LI, Orig));
}
}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 227ad8501f25..8e0ca8c6c997 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -3340,6 +3340,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
}
bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
+ if (!mayContainParallelRegion())
+ return false;
+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
if (!SPMDCompatibilityTracker.isAssumed()) {
@@ -4428,10 +4431,10 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
if (!SimplifiedValue)
return Str + std::string("none");
- if (!SimplifiedValue.getValue())
+ if (!SimplifiedValue.value())
return Str + std::string("nullptr");
- if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue()))
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.value()))
return Str + std::to_string(CI->getSExtValue());
return Str + std::string("unknown");
@@ -4456,7 +4459,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
assert((isValidState() ||
- (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) &&
+ (SimplifiedValue && SimplifiedValue.value() == nullptr)) &&
"Unexpected invalid state!");
if (!isAtFixpoint()) {
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index ae787be40c55..8eef82675e86 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -898,183 +898,6 @@ void PassManagerBuilder::populateModulePassManager(
MPM.add(createAnnotationRemarksLegacyPass());
}
-void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
- // Load sample profile before running the LTO optimization pipeline.
- if (!PGOSampleUse.empty()) {
- PM.add(createPruneEHPass());
- PM.add(createSampleProfileLoaderPass(PGOSampleUse));
- }
-
- // Remove unused virtual tables to improve the quality of code generated by
- // whole-program devirtualization and bitset lowering.
- PM.add(createGlobalDCEPass());
-
- // Provide AliasAnalysis services for optimizations.
- addInitialAliasAnalysisPasses(PM);
-
- // Allow forcing function attributes as a debugging and tuning aid.
- PM.add(createForceFunctionAttrsLegacyPass());
-
- // Infer attributes about declarations if possible.
- PM.add(createInferFunctionAttrsLegacyPass());
-
- if (OptLevel > 1) {
- // Split call-site with more constrained arguments.
- PM.add(createCallSiteSplittingPass());
-
- // Propage constant function arguments by specializing the functions.
- if (EnableFunctionSpecialization && OptLevel > 2)
- PM.add(createFunctionSpecializationPass());
-
- // Propagate constants at call sites into the functions they call. This
- // opens opportunities for globalopt (and inlining) by substituting function
- // pointers passed as arguments to direct uses of functions.
- PM.add(createIPSCCPPass());
-
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- PM.add(createCalledValuePropagationPass());
-
- // Infer attributes on declarations, call sites, arguments, etc.
- if (AttributorRun & AttributorRunOption::MODULE)
- PM.add(createAttributorLegacyPass());
- }
-
- // Infer attributes about definitions. The readnone attribute in particular is
- // required for virtual constant propagation.
- PM.add(createPostOrderFunctionAttrsLegacyPass());
- PM.add(createReversePostOrderFunctionAttrsPass());
-
- // Split globals using inrange annotations on GEP indices. This can help
- // improve the quality of generated code when virtual constant propagation or
- // control flow integrity are enabled.
- PM.add(createGlobalSplitPass());
-
- // Apply whole-program devirtualization and virtual constant propagation.
- PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr));
-
- // That's all we need at opt level 1.
- if (OptLevel == 1)
- return;
-
- // Now that we internalized some globals, see if we can hack on them!
- PM.add(createGlobalOptimizerPass());
- // Promote any localized global vars.
- PM.add(createPromoteMemoryToRegisterPass());
-
- // Linking modules together can lead to duplicated global constants, only
- // keep one copy of each constant.
- PM.add(createConstantMergePass());
-
- // Remove unused arguments from functions.
- PM.add(createDeadArgEliminationPass());
-
- // Reduce the code after globalopt and ipsccp. Both can open up significant
- // simplification opportunities, and both can propagate functions through
- // function pointers. When this happens, we often have to resolve varargs
- // calls, etc, so let instcombine do this.
- if (OptLevel > 2)
- PM.add(createAggressiveInstCombinerPass());
- PM.add(createInstructionCombiningPass());
- addExtensionsToPM(EP_Peephole, PM);
-
- // Inline small functions
- bool RunInliner = Inliner;
- if (RunInliner) {
- PM.add(Inliner);
- Inliner = nullptr;
- }
-
- PM.add(createPruneEHPass()); // Remove dead EH info.
-
- // Infer attributes on declarations, call sites, arguments, etc. for an SCC.
- if (AttributorRun & AttributorRunOption::CGSCC)
- PM.add(createAttributorCGSCCLegacyPass());
-
- // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
- // there are no OpenMP runtime calls present in the module.
- if (OptLevel > 1)
- PM.add(createOpenMPOptCGSCCLegacyPass());
-
- // Optimize globals again if we ran the inliner.
- if (RunInliner)
- PM.add(createGlobalOptimizerPass());
- PM.add(createGlobalDCEPass()); // Remove dead functions.
-
- // The IPO passes may leave cruft around. Clean up after them.
- PM.add(createInstructionCombiningPass());
- addExtensionsToPM(EP_Peephole, PM);
- PM.add(createJumpThreadingPass());
-
- // Break up allocas
- PM.add(createSROAPass());
-
- // LTO provides additional opportunities for tailcall elimination due to
- // link-time inlining, and visibility of nocapture attribute.
- if (OptLevel > 1)
- PM.add(createTailCallEliminationPass());
-
- // Infer attributes on declarations, call sites, arguments, etc.
- PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture.
- // Run a few AA driven optimizations here and now, to cleanup the code.
- PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
-
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- PM.add(NewGVN ? createNewGVNPass()
- : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
- PM.add(createMemCpyOptPass()); // Remove dead memcpys.
-
- // Nuke dead stores.
- PM.add(createDeadStoreEliminationPass());
- PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
-
- // More loops are countable; try to optimize them.
- if (EnableLoopFlatten)
- PM.add(createLoopFlattenPass());
- PM.add(createIndVarSimplifyPass());
- PM.add(createLoopDeletionPass());
- if (EnableLoopInterchange)
- PM.add(createLoopInterchangePass());
-
- if (EnableConstraintElimination)
- PM.add(createConstraintEliminationPass());
-
- // Unroll small loops and perform peeling.
- PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
- PM.add(createLoopDistributePass());
-
- addVectorPasses(PM, /* IsFullLTO */ true);
-
- addExtensionsToPM(EP_Peephole, PM);
-
- PM.add(createJumpThreadingPass());
-}
-
-void PassManagerBuilder::addLateLTOOptimizationPasses(
- legacy::PassManagerBase &PM) {
- // See comment in the new PM for justification of scheduling splitting at
- // this stage (\ref buildLTODefaultPipeline).
- if (EnableHotColdSplit)
- PM.add(createHotColdSplittingPass());
-
- // Delete basic blocks, which optimization passes may have killed.
- PM.add(
- createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true)));
-
- // Drop bodies of available externally objects to improve GlobalDCE.
- PM.add(createEliminateAvailableExternallyPass());
-
- // Now that we have optimized the program, discard unreachable functions.
- PM.add(createGlobalDCEPass());
-
- // FIXME: this is profitable (for compiler time) to do at -O0 too, but
- // currently it damages debug info.
- if (MergeFunctions)
- PM.add(createMergeFunctionsPass());
-}
-
LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
PassManagerBuilder *PMB = new PassManagerBuilder();
return wrap(PMB);
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 6859953de962..764fd57d245f 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -130,7 +130,7 @@ void ContextTrieNode::addFunctionSize(uint32_t FSize) {
if (!FuncSize)
FuncSize = 0;
- FuncSize = FuncSize.getValue() + FSize;
+ FuncSize = FuncSize.value() + FSize;
}
LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 40de69bbf2cf..55fee213cd5f 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1350,14 +1350,14 @@ SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
- return Cost ? !!Cost.getValue() : false;
+ return Cost ? !!Cost.value() : false;
}
InlineCost
SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
if (Optional<InlineCost> ReplayCost =
getExternalInlineAdvisorCost(*Candidate.CallInstr))
- return ReplayCost.getValue();
+ return ReplayCost.value();
// Adjust threshold based on call site hotness, only do this for callsite
// prioritized inliner because otherwise cost-benefit check is done earlier.
int SampleThreshold = SampleColdCallSiteThreshold;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index f4d8b79a5311..535a7736454c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1660,8 +1660,9 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
Constant *MulC;
if (match(&I, m_c_FAdd(m_FMul(m_Value(X), m_ImmConstant(MulC)),
m_Deferred(X)))) {
- MulC = ConstantExpr::getFAdd(MulC, ConstantFP::get(I.getType(), 1.0));
- return BinaryOperator::CreateFMulFMF(X, MulC, &I);
+ if (Constant *NewMulC = ConstantFoldBinaryOpOperands(
+ Instruction::FAdd, MulC, ConstantFP::get(I.getType(), 1.0), DL))
+ return BinaryOperator::CreateFMulFMF(X, NewMulC, &I);
}
if (Value *V = FAddCombine(Builder).simplify(&I))
@@ -1750,6 +1751,52 @@ Value *InstCombinerImpl::OptimizePointerDifference(Value *LHS, Value *RHS,
return Builder.CreateIntCast(Result, Ty, true);
}
+static Instruction *foldSubOfMinMax(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ Type *Ty = I.getType();
+ auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op1);
+ if (!MinMax)
+ return nullptr;
+
+ // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y)
+ // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y)
+ Value *X = MinMax->getLHS();
+ Value *Y = MinMax->getRHS();
+ if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());
+ Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);
+ return CallInst::Create(F, {X, Y});
+ }
+
+ // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z))
+ // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y))
+ Value *Z;
+ if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) {
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X))))) {
+ Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Y, Z});
+ return BinaryOperator::CreateAdd(X, USub);
+ }
+ if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X))))) {
+ Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, Ty, {Z, Y});
+ return BinaryOperator::CreateAdd(X, USub);
+ }
+ }
+
+ // sub Op0, smin((sub nsw Op0, Z), 0) --> smax Op0, Z
+ // sub Op0, smax((sub nsw Op0, Z), 0) --> smin Op0, Z
+ if (MinMax->isSigned() && match(Y, m_ZeroInt()) &&
+ match(X, m_NSWSub(m_Specific(Op0), m_Value(Z)))) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(MinMax->getIntrinsicID());
+ Function *F = Intrinsic::getDeclaration(I.getModule(), InvID, Ty);
+ return CallInst::Create(F, {Op0, Z});
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (Value *V = simplifySubInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
@@ -1919,14 +1966,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAdd(X, ConstantExpr::getSub(C, C2));
}
+ // If there's no chance any bit will need to borrow from an adjacent bit:
+ // sub C, X --> xor X, C
const APInt *Op0C;
- if (match(Op0, m_APInt(Op0C)) && Op0C->isMask()) {
- // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
- // zero.
- KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
- if ((*Op0C | RHSKnown.Zero).isAllOnes())
- return BinaryOperator::CreateXor(Op1, Op0);
- }
+ if (match(Op0, m_APInt(Op0C)) &&
+ (~computeKnownBits(Op1, 0, &I).Zero).isSubsetOf(*Op0C))
+ return BinaryOperator::CreateXor(Op1, Op0);
{
Value *Y;
@@ -2016,36 +2061,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
}
}
- if (auto *II = dyn_cast<MinMaxIntrinsic>(Op1)) {
- {
- // sub(add(X,Y), s/umin(X,Y)) --> s/umax(X,Y)
- // sub(add(X,Y), s/umax(X,Y)) --> s/umin(X,Y)
- Value *X = II->getLHS();
- Value *Y = II->getRHS();
- if (match(Op0, m_c_Add(m_Specific(X), m_Specific(Y))) &&
- (Op0->hasOneUse() || Op1->hasOneUse())) {
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
- return replaceInstUsesWith(I, InvMaxMin);
- }
- }
-
- {
- // sub(add(X,Y),umin(Y,Z)) --> add(X,usub.sat(Y,Z))
- // sub(add(X,Z),umin(Y,Z)) --> add(X,usub.sat(Z,Y))
- Value *X, *Y, *Z;
- if (match(Op1, m_OneUse(m_UMin(m_Value(Y), m_Value(Z))))) {
- if (match(Op0, m_OneUse(m_c_Add(m_Specific(Y), m_Value(X)))))
- return BinaryOperator::CreateAdd(
- X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(),
- {Y, Z}));
- if (match(Op0, m_OneUse(m_c_Add(m_Specific(Z), m_Value(X)))))
- return BinaryOperator::CreateAdd(
- X, Builder.CreateIntrinsic(Intrinsic::usub_sat, I.getType(),
- {Z, Y}));
- }
- }
- }
+ if (Instruction *R = foldSubOfMinMax(I, Builder))
+ return R;
{
// If we have a subtraction between some value and a select between
@@ -2437,13 +2454,15 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
// (X * C) - X --> X * (C - 1.0)
if (match(Op0, m_FMul(m_Specific(Op1), m_Constant(C)))) {
- Constant *CSubOne = ConstantExpr::getFSub(C, ConstantFP::get(Ty, 1.0));
- return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);
+ if (Constant *CSubOne = ConstantFoldBinaryOpOperands(
+ Instruction::FSub, C, ConstantFP::get(Ty, 1.0), DL))
+ return BinaryOperator::CreateFMulFMF(Op1, CSubOne, &I);
}
// X - (X * C) --> X * (1.0 - C)
if (match(Op1, m_FMul(m_Specific(Op0), m_Constant(C)))) {
- Constant *OneSubC = ConstantExpr::getFSub(ConstantFP::get(Ty, 1.0), C);
- return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
+ if (Constant *OneSubC = ConstantFoldBinaryOpOperands(
+ Instruction::FSub, ConstantFP::get(Ty, 1.0), C, DL))
+ return BinaryOperator::CreateFMulFMF(Op0, OneSubC, &I);
}
// Reassociate fsub/fadd sequences to create more fadd instructions and
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index ae8865651ece..a8f2cd79830a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1771,6 +1771,16 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return new ZExtInst(IsZero, Ty);
}
+ // (-(X & 1)) & Y --> (X & 1) == 0 ? 0 : Y
+ Value *Neg;
+ if (match(&I,
+ m_c_And(m_CombineAnd(m_Value(Neg),
+ m_OneUse(m_Neg(m_And(m_Value(), m_One())))),
+ m_Value(Y)))) {
+ Value *Cmp = Builder.CreateIsNull(Neg);
+ return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
+ }
+
const APInt *C;
if (match(Op1, m_APInt(C))) {
const APInt *XorC;
@@ -1798,7 +1808,8 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
unsigned Width = Ty->getScalarSizeInBits();
const APInt *ShiftC;
- if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC)))))) {
+ if (match(Op0, m_OneUse(m_SExt(m_AShr(m_Value(X), m_APInt(ShiftC))))) &&
+ ShiftC->ult(Width)) {
if (*C == APInt::getLowBitsSet(Width, Width - ShiftC->getZExtValue())) {
// We are clearing high bits that were potentially set by sext+ashr:
// and (sext (ashr X, ShiftC)), C --> lshr (sext X), ShiftC
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index 2540e545ae4d..0327efbf9614 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -61,7 +61,13 @@ bool isIdempotentRMW(AtomicRMWInst& RMWI) {
/// equivalent to its value operand.
bool isSaturating(AtomicRMWInst& RMWI) {
if (auto CF = dyn_cast<ConstantFP>(RMWI.getValOperand()))
- switch(RMWI.getOperation()) {
+ switch (RMWI.getOperation()) {
+ case AtomicRMWInst::FMax:
+ // maxnum(x, +inf) -> +inf
+ return !CF->isNegative() && CF->isInfinity();
+ case AtomicRMWInst::FMin:
+ // minnum(x, -inf) -> +inf
+ return CF->isNegative() && CF->isInfinity();
case AtomicRMWInst::FAdd:
case AtomicRMWInst::FSub:
return CF->isNaN();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 67ef2e895b6c..edfdf70c2b97 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1543,7 +1543,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
!ShAmtC->containsConstantExpression()) {
// Canonicalize a shift amount constant operand to modulo the bit-width.
Constant *WidthC = ConstantInt::get(Ty, BitWidth);
- Constant *ModuloC = ConstantExpr::getURem(ShAmtC, WidthC);
+ Constant *ModuloC =
+ ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
+ if (!ModuloC)
+ return nullptr;
if (ModuloC != ShAmtC)
return replaceOperand(*II, 2, ModuloC);
@@ -2679,7 +2682,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Handle target specific intrinsics
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
if (V)
- return V.getValue();
+ return V.value();
break;
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index e9e779b8619b..a9a930555b3c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1756,11 +1756,12 @@ static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC) {
// TODO:
// Try harder to find if the source integer type has less significant bits.
- // For example, compute number of sign bits or compute low bit mask.
+ // For example, compute number of sign bits.
KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
- int LowBits =
- (int)SrcTy->getScalarSizeInBits() - SrcKnown.countMinLeadingZeros();
- if (LowBits <= DestNumSigBits)
+ int SigBits = (int)SrcTy->getScalarSizeInBits() -
+ SrcKnown.countMinLeadingZeros() -
+ SrcKnown.countMinTrailingZeros();
+ if (SigBits <= DestNumSigBits)
return true;
return false;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index d1f89973caa1..9f6d36b85522 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1436,7 +1436,7 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
// icmp(phi(C1, C2, ...), C) -> phi(icmp(C1, C), icmp(C2, C), ...).
Constant *C = dyn_cast<Constant>(Op1);
- if (!C || C->canTrap())
+ if (!C)
return nullptr;
if (auto *Phi = dyn_cast<PHINode>(Op0))
@@ -1777,11 +1777,16 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
return new ICmpInst(NewPred, X, Zero);
}
+ APInt NewC2 = *C2;
+ KnownBits Know = computeKnownBits(And->getOperand(0), 0, And);
+ // Set high zeros of C2 to allow matching negated power-of-2.
+ NewC2 = *C2 + APInt::getHighBitsSet(C2->getBitWidth(),
+ Know.countMinLeadingZeros());
+
// Restrict this fold only for single-use 'and' (PR10267).
// ((%x & C) == 0) --> %x u< (-C) iff (-C) is power of two.
- if ((~(*C2) + 1).isPowerOf2()) {
- Constant *NegBOC =
- ConstantExpr::getNeg(cast<Constant>(And->getOperand(1)));
+ if (NewC2.isNegatedPowerOf2()) {
+ Constant *NegBOC = ConstantInt::get(And->getType(), -NewC2);
auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
return new ICmpInst(NewPred, X, NegBOC);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2a34edbf6cb8..8cb09cbac86f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -505,20 +505,23 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
Constant *C1;
if (match(Op0, m_OneUse(m_FDiv(m_Constant(C1), m_Value(X))))) {
// (C1 / X) * C --> (C * C1) / X
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- if (CC1->isNormalFP())
+ Constant *CC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FMul, C, C1, DL);
+ if (CC1 && CC1->isNormalFP())
return BinaryOperator::CreateFDivFMF(CC1, X, &I);
}
if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) {
// (X / C1) * C --> X * (C / C1)
- Constant *CDivC1 = ConstantExpr::getFDiv(C, C1);
- if (CDivC1->isNormalFP())
+ Constant *CDivC1 =
+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C1, DL);
+ if (CDivC1 && CDivC1->isNormalFP())
return BinaryOperator::CreateFMulFMF(X, CDivC1, &I);
// If the constant was a denormal, try reassociating differently.
// (X / C1) * C --> X / (C1 / C)
- Constant *C1DivC = ConstantExpr::getFDiv(C1, C);
- if (Op0->hasOneUse() && C1DivC->isNormalFP())
+ Constant *C1DivC =
+ ConstantFoldBinaryOpOperands(Instruction::FDiv, C1, C, DL);
+ if (C1DivC && Op0->hasOneUse() && C1DivC->isNormalFP())
return BinaryOperator::CreateFDivFMF(X, C1DivC, &I);
}
@@ -527,15 +530,19 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
// further folds and (X * C) + C2 is 'fma'.
if (match(Op0, m_OneUse(m_FAdd(m_Value(X), m_Constant(C1))))) {
// (X + C1) * C --> (X * C) + (C * C1)
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+ if (Constant *CC1 = ConstantFoldBinaryOpOperands(
+ Instruction::FMul, C, C1, DL)) {
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFAddFMF(XC, CC1, &I);
+ }
}
if (match(Op0, m_OneUse(m_FSub(m_Constant(C1), m_Value(X))))) {
// (C1 - X) * C --> (C * C1) - (X * C)
- Constant *CC1 = ConstantExpr::getFMul(C, C1);
- Value *XC = Builder.CreateFMulFMF(X, C, &I);
- return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ if (Constant *CC1 = ConstantFoldBinaryOpOperands(
+ Instruction::FMul, C, C1, DL)) {
+ Value *XC = Builder.CreateFMulFMF(X, C, &I);
+ return BinaryOperator::CreateFSubFMF(CC1, XC, &I);
+ }
}
}
@@ -1232,8 +1239,10 @@ static Instruction *foldFDivConstantDivisor(BinaryOperator &I) {
// on all targets.
// TODO: Use Intrinsic::canonicalize or let function attributes tell us that
// denorms are flushed?
- auto *RecipC = ConstantExpr::getFDiv(ConstantFP::get(I.getType(), 1.0), C);
- if (!RecipC->isNormalFP())
+ const DataLayout &DL = I.getModule()->getDataLayout();
+ auto *RecipC = ConstantFoldBinaryOpOperands(
+ Instruction::FDiv, ConstantFP::get(I.getType(), 1.0), C, DL);
+ if (!RecipC || !RecipC->isNormalFP())
return nullptr;
// X / C --> X * (1 / C)
@@ -1256,12 +1265,13 @@ static Instruction *foldFDivConstantDividend(BinaryOperator &I) {
// Try to reassociate C / X expressions where X includes another constant.
Constant *C2, *NewC = nullptr;
+ const DataLayout &DL = I.getModule()->getDataLayout();
if (match(I.getOperand(1), m_FMul(m_Value(X), m_Constant(C2)))) {
// C / (X * C2) --> (C / C2) / X
- NewC = ConstantExpr::getFDiv(C, C2);
+ NewC = ConstantFoldBinaryOpOperands(Instruction::FDiv, C, C2, DL);
} else if (match(I.getOperand(1), m_FDiv(m_Value(X), m_Constant(C2)))) {
// C / (X / C2) --> (C * C2) / X
- NewC = ConstantExpr::getFMul(C, C2);
+ NewC = ConstantFoldBinaryOpOperands(Instruction::FMul, C, C2, DL);
}
// Disallow denormal constants because we don't know what would happen
// on all targets.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 9d4c01ac03e2..febd0f51d25f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -925,7 +925,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
*II, DemandedMask, Known, KnownBitsComputed);
if (V)
- return V.getValue();
+ return V.value();
break;
}
}
@@ -1636,7 +1636,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
*II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
simplifyAndSetOp);
if (V)
- return V.getValue();
+ return V.value();
break;
}
} // switch on IntrinsicID
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 22659a8e4951..b80c58183dd5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -228,8 +228,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
// truncate a subset of scalar bits of an insert op.
if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
Value *Scalar;
+ Value *Vec;
uint64_t InsIndexC;
- if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
+ if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
m_ConstantInt(InsIndexC))))
return nullptr;
@@ -239,8 +240,19 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
// of elements 4-7 of the bitcasted vector.
unsigned NarrowingRatio =
NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
- if (ExtIndexC / NarrowingRatio != InsIndexC)
+
+ if (ExtIndexC / NarrowingRatio != InsIndexC) {
+ // Remove insertelement, if we don't use the inserted element.
+ // extractelement (bitcast (insertelement (Vec, b)), a) ->
+ // extractelement (bitcast (Vec), a)
+ // FIXME: this should be removed to SimplifyDemandedVectorElts,
+ // once scale vectors are supported.
+ if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
+ Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
+ return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
+ }
return nullptr;
+ }
// We are extracting part of the original scalar. How that scalar is
// inserted into the vector depends on the endian-ness. Example:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0816a4a575d9..75520a0c8d5f 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -523,11 +523,12 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
// Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
// if C1 and C2 are constants.
Value *A, *B;
- Constant *C1, *C2;
+ Constant *C1, *C2, *CRes;
if (Op0 && Op1 &&
Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&
- match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) {
+ match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2)))) &&
+ (CRes = ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL))) {
bool IsNUW = hasNoUnsignedWrap(I) &&
hasNoUnsignedWrap(*Op0) &&
hasNoUnsignedWrap(*Op1);
@@ -544,7 +545,7 @@ bool InstCombinerImpl::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
InsertNewInstWith(NewBO, I);
NewBO->takeName(Op1);
replaceOperand(I, 0, NewBO);
- replaceOperand(I, 1, ConstantExpr::get(Opcode, C1, C2));
+ replaceOperand(I, 1, CRes);
// Conservatively clear the optional flags, since they may not be
// preserved by the reassociation.
ClearSubclassDataAfterReassociation(I);
@@ -1324,6 +1325,11 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter))
return nullptr;
+ // Fold constants for the predecessor block with constant incoming values.
+ Constant *NewC = ConstantFoldBinaryOpOperands(BO.getOpcode(), C0, C1, DL);
+ if (!NewC)
+ return nullptr;
+
// Make a new binop in the predecessor block with the non-constant incoming
// values.
Builder.SetInsertPoint(PredBlockBranch);
@@ -1333,9 +1339,6 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
NotFoldedNewBO->copyIRFlags(&BO);
- // Fold constants for the predecessor block with constant incoming values.
- Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1);
-
// Replace the binop with a phi of the new values. The old phis are dead.
PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
NewPhi->addIncoming(NewBO, OtherBB);
@@ -1774,9 +1777,10 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// for target-independent shuffle creation.
if (I >= SrcVecNumElts || ShMask[I] < 0) {
Constant *MaybeUndef =
- ConstOp1 ? ConstantExpr::get(Opcode, UndefScalar, CElt)
- : ConstantExpr::get(Opcode, CElt, UndefScalar);
- if (!match(MaybeUndef, m_Undef())) {
+ ConstOp1
+ ? ConstantFoldBinaryOpOperands(Opcode, UndefScalar, CElt, DL)
+ : ConstantFoldBinaryOpOperands(Opcode, CElt, UndefScalar, DL);
+ if (!MaybeUndef || !match(MaybeUndef, m_Undef())) {
MayChange = false;
break;
}
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7a5a74aa4fff..4fed4bd18fb1 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -101,6 +101,7 @@ static const uint64_t kSmallX86_64ShadowOffsetAlignMask = ~0xFFFULL;
static const uint64_t kLinuxKasan_ShadowOffset64 = 0xdffffc0000000000;
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 44;
static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52;
+static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29;
static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000;
static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37;
static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
@@ -476,6 +477,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
TargetTriple.getArch() == Triple::ppc64le;
bool IsSystemZ = TargetTriple.getArch() == Triple::systemz;
bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
+ bool IsMIPSN32ABI = TargetTriple.getEnvironment() == Triple::GNUABIN32;
bool IsMIPS32 = TargetTriple.isMIPS32();
bool IsMIPS64 = TargetTriple.isMIPS64();
bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
@@ -496,6 +498,8 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
if (LongSize == 32) {
if (IsAndroid)
Mapping.Offset = kDynamicShadowSentinel;
+ else if (IsMIPSN32ABI)
+ Mapping.Offset = kMIPS_ShadowOffsetN32;
else if (IsMIPS32)
Mapping.Offset = kMIPS32_ShadowOffset32;
else if (IsFreeBSD)
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index b11b84d65d23..57c491436b93 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -39,7 +39,8 @@ addModuleFlags(Module &M,
Nodes.push_back(MDNode::get(Context, Vals));
}
- M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
+ M.addModuleFlag(Module::Append, "CG Profile",
+ MDTuple::getDistinct(Context, Nodes));
return true;
}
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 218b4bbfb6c0..b01c74320380 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -180,11 +180,31 @@ static cl::opt<bool> ClWithTls(
"platforms that support this"),
cl::Hidden, cl::init(true));
-static cl::opt<bool>
- ClRecordStackHistory("hwasan-record-stack-history",
- cl::desc("Record stack frames with tagged allocations "
- "in a thread-local ring buffer"),
- cl::Hidden, cl::init(true));
+// Mode for selecting how to insert frame record info into the stack ring
+// buffer.
+enum RecordStackHistoryMode {
+ // Do not record frame record info.
+ none,
+
+ // Insert instructions into the prologue for storing into the stack ring
+ // buffer directly.
+ instr,
+
+ // Add a call to __hwasan_add_frame_record in the runtime.
+ libcall,
+};
+
+static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
+ "hwasan-record-stack-history",
+ cl::desc("Record stack frames with tagged allocations in a thread-local "
+ "ring buffer"),
+ cl::values(clEnumVal(none, "Do not record stack ring history"),
+ clEnumVal(instr, "Insert instructions into the prologue for "
+ "storing into the stack ring buffer directly"),
+ clEnumVal(libcall, "Add a call to __hwasan_add_frame_record for "
+ "storing into the stack ring buffer")),
+ cl::Hidden, cl::init(instr));
+
static cl::opt<bool>
ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
cl::desc("instrument memory intrinsics"),
@@ -313,6 +333,7 @@ public:
Value *getPC(IRBuilder<> &IRB);
Value *getSP(IRBuilder<> &IRB);
+ Value *getFrameRecordInfo(IRBuilder<> &IRB);
void instrumentPersonalityFunctions();
@@ -378,6 +399,7 @@ private:
FunctionCallee HwasanTagMemoryFunc;
FunctionCallee HwasanGenerateTagFunc;
+ FunctionCallee HwasanRecordFrameRecordFunc;
Constant *ShadowGlobal;
@@ -629,6 +651,9 @@ void HWAddressSanitizer::initializeCallbacks(Module &M) {
HwasanGenerateTagFunc =
M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
+ HwasanRecordFrameRecordFunc = M.getOrInsertFunction(
+ "__hwasan_add_frame_record", IRB.getVoidTy(), Int64Ty);
+
ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
ArrayType::get(IRB.getInt8Ty(), 0));
@@ -1132,6 +1157,21 @@ Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
return CachedSP;
}
+Value *HWAddressSanitizer::getFrameRecordInfo(IRBuilder<> &IRB) {
+ // Prepare ring buffer data.
+ Value *PC = getPC(IRB);
+ Value *SP = getSP(IRB);
+
+ // Mix SP and PC.
+ // Assumptions:
+ // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
+ // SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
+ // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
+ // 0xSSSSPPPPPPPPPPPP
+ SP = IRB.CreateShl(SP, 44);
+ return IRB.CreateOr(PC, SP);
+}
+
void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
if (!Mapping.InTls)
ShadowBase = getShadowNonTls(IRB);
@@ -1141,50 +1181,67 @@ void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
if (!WithFrameRecord && ShadowBase)
return;
- Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
- assert(SlotPtr);
-
- Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
- // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
- Value *ThreadLongMaybeUntagged =
- TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
+ Value *SlotPtr = nullptr;
+ Value *ThreadLong = nullptr;
+ Value *ThreadLongMaybeUntagged = nullptr;
+
+ auto getThreadLongMaybeUntagged = [&]() {
+ if (!SlotPtr)
+ SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
+ if (!ThreadLong)
+ ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
+ // Extract the address field from ThreadLong. Unnecessary on AArch64 with
+ // TBI.
+ return TargetTriple.isAArch64() ? ThreadLong
+ : untagPointer(IRB, ThreadLong);
+ };
if (WithFrameRecord) {
- StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
-
- // Prepare ring buffer data.
- Value *PC = getPC(IRB);
- Value *SP = getSP(IRB);
-
- // Mix SP and PC.
- // Assumptions:
- // PC is 0x0000PPPPPPPPPPPP (48 bits are meaningful, others are zero)
- // SP is 0xsssssssssssSSSS0 (4 lower bits are zero)
- // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
- // 0xSSSSPPPPPPPPPPPP
- SP = IRB.CreateShl(SP, 44);
-
- // Store data to ring buffer.
- Value *RecordPtr =
- IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
- IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
-
- // Update the ring buffer. Top byte of ThreadLong defines the size of the
- // buffer in pages, it must be a power of two, and the start of the buffer
- // must be aligned by twice that much. Therefore wrap around of the ring
- // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
- // The use of AShr instead of LShr is due to
- // https://bugs.llvm.org/show_bug.cgi?id=39030
- // Runtime library makes sure not to use the highest bit.
- Value *WrapMask = IRB.CreateXor(
- IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
- ConstantInt::get(IntptrTy, (uint64_t)-1));
- Value *ThreadLongNew = IRB.CreateAnd(
- IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
- IRB.CreateStore(ThreadLongNew, SlotPtr);
+ switch (ClRecordStackHistory) {
+ case libcall: {
+ // Emit a runtime call into hwasan rather than emitting instructions for
+ // recording stack history.
+ Value *FrameRecordInfo = getFrameRecordInfo(IRB);
+ IRB.CreateCall(HwasanRecordFrameRecordFunc, {FrameRecordInfo});
+ break;
+ }
+ case instr: {
+ ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
+
+ StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
+
+ // Store data to ring buffer.
+ Value *FrameRecordInfo = getFrameRecordInfo(IRB);
+ Value *RecordPtr = IRB.CreateIntToPtr(ThreadLongMaybeUntagged,
+ IntptrTy->getPointerTo(0));
+ IRB.CreateStore(FrameRecordInfo, RecordPtr);
+
+ // Update the ring buffer. Top byte of ThreadLong defines the size of the
+ // buffer in pages, it must be a power of two, and the start of the buffer
+ // must be aligned by twice that much. Therefore wrap around of the ring
+ // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
+ // The use of AShr instead of LShr is due to
+ // https://bugs.llvm.org/show_bug.cgi?id=39030
+ // Runtime library makes sure not to use the highest bit.
+ Value *WrapMask = IRB.CreateXor(
+ IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
+ ConstantInt::get(IntptrTy, (uint64_t)-1));
+ Value *ThreadLongNew = IRB.CreateAnd(
+ IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
+ IRB.CreateStore(ThreadLongNew, SlotPtr);
+ break;
+ }
+ case none: {
+ llvm_unreachable(
+ "A stack history recording mode should've been selected.");
+ }
+ }
}
if (!ShadowBase) {
+ if (!ThreadLongMaybeUntagged)
+ ThreadLongMaybeUntagged = getThreadLongMaybeUntagged();
+
// Get shadow base address by aligning RecordPtr up.
// Note: this is not correct if the pointer is already aligned.
// Runtime library will make sure this never happens.
@@ -1408,7 +1465,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F,
Instruction *InsertPt = &*F.getEntryBlock().begin();
IRBuilder<> EntryIRB(InsertPt);
emitPrologue(EntryIRB,
- /*WithFrameRecord*/ ClRecordStackHistory &&
+ /*WithFrameRecord*/ ClRecordStackHistory != none &&
Mapping.WithFrameRecord &&
!SInfo.AllocasToInstrument.empty());
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 7843b1522830..3572cb3b50e2 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -1244,6 +1244,7 @@ bool InstrProfiling::emitRuntimeHook() {
auto *Var =
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
+ Var->setVisibility(GlobalValue::HiddenVisibility);
if (TT.isOSBinFormatELF() && !TT.isPS()) {
// Mark the user variable as used so that it isn't stripped out.
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index c33b1b3b1a5c..d4aa31db8337 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -486,7 +486,7 @@ static bool isTsanAtomic(const Instruction *I) {
if (!SSID)
return false;
if (isa<LoadInst>(I) || isa<StoreInst>(I))
- return SSID.getValue() != SyncScope::SingleThread;
+ return SSID.value() != SyncScope::SingleThread;
return true;
}
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 8a1761505d59..fe6f9486ab0c 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -611,9 +611,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S,
ConstCand->ConstInt->getValue());
if (Diff) {
const InstructionCost ImmCosts =
- TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.getValue(), Ty);
+ TTI->getIntImmCodeSizeCost(Opcode, OpndIdx, Diff.value(), Ty);
Cost -= ImmCosts;
- LLVM_DEBUG(dbgs() << "Offset " << Diff.getValue() << " "
+ LLVM_DEBUG(dbgs() << "Offset " << Diff.value() << " "
<< "has penalty: " << ImmCosts << "\n"
<< "Adjusted cost: " << Cost << "\n");
}
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 783301fe589e..b460637b7d88 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -748,14 +748,14 @@ void GVNPass::printPipeline(
OS << "<";
if (Options.AllowPRE != None)
- OS << (Options.AllowPRE.getValue() ? "" : "no-") << "pre;";
+ OS << (Options.AllowPRE.value() ? "" : "no-") << "pre;";
if (Options.AllowLoadPRE != None)
- OS << (Options.AllowLoadPRE.getValue() ? "" : "no-") << "load-pre;";
+ OS << (Options.AllowLoadPRE.value() ? "" : "no-") << "load-pre;";
if (Options.AllowLoadPRESplitBackedge != None)
- OS << (Options.AllowLoadPRESplitBackedge.getValue() ? "" : "no-")
+ OS << (Options.AllowLoadPRESplitBackedge.value() ? "" : "no-")
<< "split-backedge-load-pre;";
if (Options.AllowMemDep != None)
- OS << (Options.AllowMemDep.getValue() ? "" : "no-") << "memdep";
+ OS << (Options.AllowMemDep.value() ? "" : "no-") << "memdep";
OS << ">";
}
@@ -1059,8 +1059,8 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
if (DT->dominates(cast<Instruction>(OtherAccess), cast<Instruction>(U)))
OtherAccess = U;
else
- assert(DT->dominates(cast<Instruction>(U),
- cast<Instruction>(OtherAccess)));
+ assert(U == OtherAccess || DT->dominates(cast<Instruction>(U),
+ cast<Instruction>(OtherAccess)));
} else
OtherAccess = U;
}
@@ -1494,14 +1494,6 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- // FIXME: Can we support the fallthrough edge?
- if (isa<CallBrInst>(Pred->getTerminator())) {
- LLVM_DEBUG(
- dbgs() << "COULD NOT PRE LOAD BECAUSE OF CALLBR CRITICAL EDGE '"
- << Pred->getName() << "': " << *Load << '\n');
- return false;
- }
-
if (LoadBB->isEHPad()) {
LLVM_DEBUG(
dbgs() << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
@@ -2875,11 +2867,6 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
if (isa<IndirectBrInst>(PREPred->getTerminator()))
return false;
- // Don't do PRE across callbr.
- // FIXME: Can we do this across the fallthrough edge?
- if (isa<CallBrInst>(PREPred->getTerminator()))
- return false;
-
// We can't do PRE safely on a critical edge, so instead we schedule
// the edge to be split and perform the PRE the next time we iterate
// on the function.
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index e977dd18be9f..a9ca0bdc8f7b 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -106,13 +106,18 @@ static cl::opt<bool> VerifyIndvars(
static cl::opt<ReplaceExitVal> ReplaceExitValue(
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
- cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
- clEnumValN(OnlyCheapRepl, "cheap",
- "only replace exit value when the cost is cheap"),
- clEnumValN(NoHardUse, "noharduse",
- "only replace exit values when loop def likely dead"),
- clEnumValN(AlwaysRepl, "always",
- "always replace exit value whenever possible")));
+ cl::values(
+ clEnumValN(NeverRepl, "never", "never replace exit value"),
+ clEnumValN(OnlyCheapRepl, "cheap",
+ "only replace exit value when the cost is cheap"),
+ clEnumValN(
+ UnusedIndVarInLoop, "unusedindvarinloop",
+ "only replace exit value when it is an unused "
+ "induction variable in the loop and has cheap replacement cost"),
+ clEnumValN(NoHardUse, "noharduse",
+ "only replace exit values when loop def likely dead"),
+ clEnumValN(AlwaysRepl, "always",
+ "always replace exit value whenever possible")));
static cl::opt<bool> UsePostIncrementRanges(
"indvars-post-increment-ranges", cl::Hidden,
@@ -1302,15 +1307,39 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
}
static void replaceLoopPHINodesWithPreheaderValues(
- Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
auto *LoopPreheader = L->getLoopPreheader();
auto *LoopHeader = L->getHeader();
+ SmallVector<Instruction *> Worklist;
for (auto &PN : LoopHeader->phis()) {
auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
+ for (User *U : PN.users())
+ Worklist.push_back(cast<Instruction>(U));
PN.replaceAllUsesWith(PreheaderIncoming);
DeadInsts.emplace_back(&PN);
}
+
+ // Replacing with the preheader value will often allow IV users to simplify
+ // (especially if the preheader value is a constant).
+ SmallPtrSet<Instruction *, 16> Visited;
+ while (!Worklist.empty()) {
+ auto *I = cast<Instruction>(Worklist.pop_back_val());
+ if (!Visited.insert(I).second)
+ continue;
+
+ // Don't simplify instructions outside the loop.
+ if (!L->contains(I))
+ continue;
+
+ Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
+ if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
+ for (User *U : I->users())
+ Worklist.push_back(cast<Instruction>(U));
+ I->replaceAllUsesWith(Res);
+ DeadInsts.emplace_back(I);
+ }
+ }
}
static void replaceWithInvariantCond(
@@ -1549,14 +1578,19 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (!BI)
return true;
- // If already constant, nothing to do.
- if (isa<Constant>(BI->getCondition()))
- return true;
-
// Likewise, the loop latch must be dominated by the exiting BB.
if (!DT->dominates(ExitingBB, L->getLoopLatch()))
return true;
+ if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
+ // If already constant, nothing to do. However, if this is an
+ // unconditional exit, we can still replace header phis with their
+ // preheader value.
+ if (!L->contains(BI->getSuccessor(CI->isNullValue())))
+ replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts);
+ return true;
+ }
+
return false;
});
@@ -1640,7 +1674,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// the header PHIs with values coming from the preheader.
if (ExitCount->isZero()) {
foldExit(L, ExitingBB, true, DeadInsts);
- replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
+ replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts);
Changed = true;
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 799669a19796..b54cf5e7cb20 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -1710,7 +1710,7 @@ IntersectSignedRange(ScalarEvolution &SE,
return None;
if (!R1)
return R2;
- auto &R1Value = R1.getValue();
+ auto &R1Value = R1.value();
// We never return empty ranges from this function, and R1 is supposed to be
// a result of intersection. Thus, R1 is never empty.
assert(!R1Value.isEmpty(SE, /* IsSigned */ true) &&
@@ -1739,7 +1739,7 @@ IntersectUnsignedRange(ScalarEvolution &SE,
return None;
if (!R1)
return R2;
- auto &R1Value = R1.getValue();
+ auto &R1Value = R1.value();
// We never return empty ranges from this function, and R1 is supposed to be
// a result of intersection. Thus, R1 is never empty.
assert(!R1Value.isEmpty(SE, /* IsSigned */ false) &&
@@ -1950,13 +1950,12 @@ bool InductiveRangeCheckElimination::run(
LS.IsSignedPredicate);
if (Result) {
auto MaybeSafeIterRange =
- IntersectRange(SE, SafeIterRange, Result.getValue());
+ IntersectRange(SE, SafeIterRange, Result.value());
if (MaybeSafeIterRange) {
- assert(
- !MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) &&
- "We should never return empty ranges!");
+ assert(!MaybeSafeIterRange.value().isEmpty(SE, LS.IsSignedPredicate) &&
+ "We should never return empty ranges!");
RangeChecksToEliminate.push_back(IRC);
- SafeIterRange = MaybeSafeIterRange.getValue();
+ SafeIterRange = MaybeSafeIterRange.value();
}
}
}
@@ -1964,8 +1963,7 @@ bool InductiveRangeCheckElimination::run(
if (!SafeIterRange)
return false;
- LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT,
- SafeIterRange.getValue());
+ LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, SafeIterRange.value());
bool Changed = LC.run();
if (Changed) {
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 5caefc422921..b31eab50c5ec 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -1459,9 +1459,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// Add all the unavailable predecessors to the PredsToSplit list.
for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
- // Same for CallBr.
- if (isa<IndirectBrInst>(P->getTerminator()) ||
- isa<CallBrInst>(P->getTerminator()))
+ if (isa<IndirectBrInst>(P->getTerminator()))
return false;
if (!AvailablePredSet.count(P))
@@ -1685,9 +1683,8 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
}
// If the predecessor ends with an indirect goto, we can't change its
- // destination. Same for CallBr.
- if (isa<IndirectBrInst>(Pred->getTerminator()) ||
- isa<CallBrInst>(Pred->getTerminator()))
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
PredToDestList.emplace_back(Pred, DestBB);
@@ -1924,10 +1921,9 @@ bool JumpThreadingPass::processBranchOnXOR(BinaryOperator *BO) {
}
// If any of predecessors end with an indirect goto, we can't change its
- // destination. Same for CallBr.
+ // destination.
if (any_of(BlocksToFoldInto, [](BasicBlock *Pred) {
- return isa<IndirectBrInst>(Pred->getTerminator()) ||
- isa<CallBrInst>(Pred->getTerminator());
+ return isa<IndirectBrInst>(Pred->getTerminator());
}))
return false;
@@ -2173,6 +2169,9 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
BasicBlock *ZeroPred = nullptr;
BasicBlock *OnePred = nullptr;
for (BasicBlock *P : predecessors(PredBB)) {
+ // If PredPred ends with IndirectBrInst, we can't handle it.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ continue;
if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
evaluateOnPredecessorEdge(BB, P, Cond))) {
if (CI->isZero()) {
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 492f4e40395a..f54264b1dca6 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1508,8 +1508,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
return false;
for (BasicBlock *BBPred : predecessors(BB)) {
- if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
- isa<CallBrInst>(BBPred->getTerminator()))
+ if (isa<IndirectBrInst>(BBPred->getTerminator()))
return false;
}
return true;
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 03a10cb36bb6..b178bcae3b0e 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -602,7 +602,7 @@ private:
: LLVMLoopDistributeFollowupCoincident});
if (PartitionID) {
Loop *NewLoop = Part->getDistributedLoop();
- NewLoop->setLoopID(PartitionID.getValue());
+ NewLoop->setLoopID(PartitionID.value());
}
}
};
@@ -826,7 +826,7 @@ public:
{LLVMLoopDistributeFollowupAll,
LLVMLoopDistributeFollowupFallback},
"llvm.loop.distribute.", true)
- .getValue();
+ .value();
LVer.getNonVersionedLoop()->setLoopID(UnversionedLoopID);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 88d6a7aff3c9..d908c151d9f2 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1483,7 +1483,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// anything where the alignment isn't at least the element size.
assert((StoreAlign && LoadAlign) &&
"Expect unordered load/store to have align.");
- if (StoreAlign.getValue() < StoreSize || LoadAlign.getValue() < StoreSize)
+ if (StoreAlign.value() < StoreSize || LoadAlign.value() < StoreSize)
return Changed;
// If the element.atomic memcpy is not lowered into explicit
@@ -1497,7 +1497,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// Note that unordered atomic loads/stores are *required* by the spec to
// have an alignment but non-atomic loads/stores may not.
NewCall = Builder.CreateElementUnorderedAtomicMemCpy(
- StoreBasePtr, StoreAlign.getValue(), LoadBasePtr, LoadAlign.getValue(),
+ StoreBasePtr, StoreAlign.value(), LoadBasePtr, LoadAlign.value(),
NumBytes, StoreSize, AATags.TBAA, AATags.TBAAStruct, AATags.Scope,
AATags.NoAlias);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 9959e408e2e2..4ef7809c6681 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -5601,27 +5601,6 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
DeadInsts.emplace_back(OperandIsInstr);
}
-// Check if there are any loop exit values which are only used once within the
-// loop which may potentially be optimized with a call to rewriteLoopExitValue.
-static bool LoopExitValHasSingleUse(Loop *L) {
- BasicBlock *ExitBB = L->getExitBlock();
- if (!ExitBB)
- return false;
-
- for (PHINode &ExitPhi : ExitBB->phis()) {
- if (ExitPhi.getNumIncomingValues() != 1)
- break;
-
- BasicBlock *Pred = ExitPhi.getIncomingBlock(0);
- Value *IVNext = ExitPhi.getIncomingValueForBlock(Pred);
- // One use would be the exit phi node, and there should be only one other
- // use for this to be considered.
- if (IVNext->getNumUses() == 2)
- return true;
- }
- return false;
-}
-
/// Rewrite all the fixup locations with new values, following the chosen
/// solution.
void LSRInstance::ImplementSolution(
@@ -6406,8 +6385,8 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
// less DWARF ops than an iteration count-based expression.
if (Optional<APInt> Offset =
SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
- if (Offset.getValue().getMinSignedBits() <= 64)
- SalvageExpr->createOffsetExpr(Offset.getValue().getSExtValue(),
+ if (Offset.value().getMinSignedBits() <= 64)
+ SalvageExpr->createOffsetExpr(Offset.value().getSExtValue(),
LSRInductionVar);
} else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
SE))
@@ -6627,12 +6606,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
// When this is the case, if the exit value of the IV can be calculated using
// SCEV, we can replace the exit block PHI with the final value of the IV and
// skip the updates in each loop iteration.
- if (L->isRecursivelyLCSSAForm(DT, LI) && LoopExitValHasSingleUse(L)) {
+ if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Rewriter(SE, DL, "lsr", false);
int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
- OnlyCheapRepl, DeadInsts);
+ UnusedIndVarInLoop, DeadInsts);
if (Rewrites) {
Changed = true;
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 8c2868563227..64fcdfa15aa9 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -373,7 +373,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderInner});
if (NewInnerEpilogueLoopID)
- SubLoop->setLoopID(NewInnerEpilogueLoopID.getValue());
+ SubLoop->setLoopID(NewInnerEpilogueLoopID.value());
// Find trip count and trip multiple
BasicBlock *Latch = L->getLoopLatch();
@@ -403,14 +403,14 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupRemainderOuter});
if (NewOuterEpilogueLoopID)
- EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.getValue());
+ EpilogueOuterLoop->setLoopID(NewOuterEpilogueLoopID.value());
}
Optional<MDNode *> NewInnerLoopID =
makeFollowupLoopID(OrigOuterLoopID, {LLVMLoopUnrollAndJamFollowupAll,
LLVMLoopUnrollAndJamFollowupInner});
if (NewInnerLoopID)
- SubLoop->setLoopID(NewInnerLoopID.getValue());
+ SubLoop->setLoopID(NewInnerLoopID.value());
else
SubLoop->setLoopID(OrigSubLoopID);
@@ -419,7 +419,7 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
OrigOuterLoopID,
{LLVMLoopUnrollAndJamFollowupAll, LLVMLoopUnrollAndJamFollowupOuter});
if (NewOuterLoopID) {
- L->setLoopID(NewOuterLoopID.getValue());
+ L->setLoopID(NewOuterLoopID.value());
// Do not setLoopAlreadyUnrolled if a followup was given.
return UnrollResult;
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fda86afe5f9d..de5833f60adc 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1324,7 +1324,7 @@ static LoopUnrollResult tryToUnrollLoop(
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupRemainder});
if (RemainderLoopID)
- RemainderLoop->setLoopID(RemainderLoopID.getValue());
+ RemainderLoop->setLoopID(RemainderLoopID.value());
}
if (UnrollResult != LoopUnrollResult::FullyUnrolled) {
@@ -1332,7 +1332,7 @@ static LoopUnrollResult tryToUnrollLoop(
makeFollowupLoopID(OrigLoopID, {LLVMLoopUnrollFollowupAll,
LLVMLoopUnrollFollowupUnrolled});
if (NewLoopID) {
- L->setLoopID(NewLoopID.getValue());
+ L->setLoopID(NewLoopID.value());
// Do not setLoopAlreadyUnrolled if loop attributes have been specified
// explicitly.
@@ -1645,15 +1645,15 @@ void LoopUnrollPass::printPipeline(
OS, MapClassName2PassName);
OS << "<";
if (UnrollOpts.AllowPartial != None)
- OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
+ OS << (UnrollOpts.AllowPartial.value() ? "" : "no-") << "partial;";
if (UnrollOpts.AllowPeeling != None)
- OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
+ OS << (UnrollOpts.AllowPeeling.value() ? "" : "no-") << "peeling;";
if (UnrollOpts.AllowRuntime != None)
- OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
+ OS << (UnrollOpts.AllowRuntime.value() ? "" : "no-") << "runtime;";
if (UnrollOpts.AllowUpperBound != None)
- OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
+ OS << (UnrollOpts.AllowUpperBound.value() ? "" : "no-") << "upperbound;";
if (UnrollOpts.AllowProfileBasedPeeling != None)
- OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
+ OS << (UnrollOpts.AllowProfileBasedPeeling.value() ? "" : "no-")
<< "profile-peeling;";
if (UnrollOpts.FullUnrollMaxCount != None)
OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index da1737979305..75f0896d4845 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -1929,11 +1930,23 @@ Value *ReassociatePass::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
+ const DataLayout &DL = I->getModule()->getDataLayout();
Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
- while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
- Constant *C = cast<Constant>(Ops.pop_back_val().Op);
- Cst = Cst ? ConstantExpr::get(Opcode, C, Cst) : C;
+ while (!Ops.empty()) {
+ if (auto *C = dyn_cast<Constant>(Ops.back().Op)) {
+ if (!Cst) {
+ Ops.pop_back();
+ Cst = C;
+ continue;
+ }
+ if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, C, Cst, DL)) {
+ Ops.pop_back();
+ Cst = Res;
+ continue;
+ }
+ }
+ break;
}
// If there was nothing but constants then we are done.
if (Ops.empty())
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index e9983ff82176..079b2fc973b9 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -769,8 +769,7 @@ llvm::SplitAllCriticalEdges(Function &F,
unsigned NumBroken = 0;
for (BasicBlock &BB : F) {
Instruction *TI = BB.getTerminator();
- if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI) &&
- !isa<CallBrInst>(TI))
+ if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
if (SplitCriticalEdge(TI, i, Options))
++NumBroken;
@@ -1132,9 +1131,7 @@ SplitBlockPredecessorsImpl(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
// all BlockAddress uses would need to be updated.
assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
"Cannot split an edge from an IndirectBrInst");
- assert(!isa<CallBrInst>(Preds[i]->getTerminator()) &&
- "Cannot split an edge from a CallBrInst");
- Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+ Preds[i]->getTerminator()->replaceSuccessorWith(BB, NewBB);
}
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 0b36e8708a03..9c595401ce29 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -129,8 +129,7 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
SmallVector<BasicBlock *, 4> LoopPreds;
// Check if extra modifications will be required to preserve loop-simplify
// form after splitting. If it would require splitting blocks with IndirectBr
- // or CallBr terminators, bail out if preserving loop-simplify form is
- // requested.
+ // terminators, bail out if preserving loop-simplify form is requested.
if (LI) {
if (Loop *TIL = LI->getLoopFor(TIBB)) {
@@ -156,10 +155,7 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
// Loop-simplify form can be preserved, if we can split all in-loop
// predecessors.
if (any_of(LoopPreds, [](BasicBlock *Pred) {
- const Instruction *T = Pred->getTerminator();
- if (const auto *CBR = dyn_cast<CallBrInst>(T))
- return CBR->getDefaultDest() != Pred;
- return isa<IndirectBrInst>(T);
+ return isa<IndirectBrInst>(Pred->getTerminator());
})) {
if (Options.PreserveLoopSimplify)
return nullptr;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index f94d854f7ee8..421f1f329f07 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -927,6 +927,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::AlwaysInline:
case Attribute::Cold:
case Attribute::DisableSanitizerInstrumentation:
+ case Attribute::FnRetThunkExtern:
case Attribute::Hot:
case Attribute::NoRecurse:
case Attribute::InlineHint:
@@ -1777,7 +1778,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
if (Count)
newFunction->setEntryCount(
- ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME
+ ProfileCount(Count.value(), Function::PCT_Real)); // FIXME
BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 205f7a7d9ed2..24126b5ab67b 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -961,8 +961,13 @@ createDebugifyFunctionPass(enum DebugifyMode Mode,
}
PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
- applyDebugifyMetadata(M, M.functions(),
- "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ applyDebugifyMetadata(M, M.functions(),
+ "ModuleDebugify: ", /*ApplyToMF*/ nullptr);
+ else
+ collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
+ "ModuleDebugify (original debuginfo)",
+ NameOfWrappedPass);
return PreservedAnalyses::all();
}
@@ -992,8 +997,14 @@ FunctionPass *createCheckDebugifyFunctionPass(
PreservedAnalyses NewPMCheckDebugifyPass::run(Module &M,
ModuleAnalysisManager &) {
- checkDebugifyMetadata(M, M.functions(), "", "CheckModuleDebugify", false,
- nullptr);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, M.functions(), NameOfWrappedPass,
+ "CheckModuleDebugify", Strip, StatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)", NameOfWrappedPass,
+ OrigDIVerifyBugsReportFilePath);
return PreservedAnalyses::all();
}
@@ -1006,13 +1017,15 @@ static bool isIgnoredPass(StringRef PassID) {
void DebugifyEachInstrumentation::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
- PIC.registerBeforeNonSkippedPassCallback([](StringRef P, Any IR) {
+ PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) {
if (isIgnoredPass(P))
return;
if (any_isa<const Function *>(IR))
- applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)));
+ applyDebugify(*const_cast<Function *>(any_cast<const Function *>(IR)),
+ Mode, DebugInfoBeforePass, P);
else if (any_isa<const Module *>(IR))
- applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)));
+ applyDebugify(*const_cast<Module *>(any_cast<const Module *>(IR)),
+ Mode, DebugInfoBeforePass, P);
});
PIC.registerAfterPassCallback([this](StringRef P, Any IR,
const PreservedAnalyses &PassPA) {
@@ -1022,12 +1035,24 @@ void DebugifyEachInstrumentation::registerCallbacks(
auto &F = *const_cast<Function *>(any_cast<const Function *>(IR));
Module &M = *F.getParent();
auto It = F.getIterator();
- checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
- "CheckFunctionDebugify", /*Strip=*/true, &StatsMap);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
+ "CheckFunctionDebugify", /*Strip=*/true, DIStatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, make_range(It, std::next(It)), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
} else if (any_isa<const Module *>(IR)) {
auto &M = *const_cast<Module *>(any_cast<const Module *>(IR));
- checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
- /*Strip=*/true, &StatsMap);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
+ /*Strip=*/true, DIStatsMap);
+ else
+ checkDebugInfoMetadata(
+ M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
}
});
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index cd3b6c1a095a..023a0afd329b 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -402,7 +402,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
Optional<MDNode *> NewLoopID = makeFollowupLoopID(
LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
if (NewLoopID) {
- NewLoop->setLoopID(NewLoopID.getValue());
+ NewLoop->setLoopID(NewLoopID.value());
// Do not setLoopAlreadyUnrolled if loop attributes have been defined
// explicitly.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index ec898c463574..82f993b4ceab 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -75,9 +75,6 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
if (isa<IndirectBrInst>(PredBB->getTerminator()))
// We cannot rewrite exiting edges from an indirectbr.
return false;
- if (isa<CallBrInst>(PredBB->getTerminator()))
- // We cannot rewrite exiting edges from a callbr.
- return false;
InLoopPredecessors.push_back(PredBB);
} else {
@@ -359,7 +356,7 @@ TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
Optional<int> Count =
getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
if (Count)
- return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+ return Count.value() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable"))
return TM_ForcedByUser;
@@ -380,7 +377,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
Optional<int> Count =
getOptionalIntLoopAttribute(L, "llvm.loop.unroll_and_jam.count");
if (Count)
- return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
+ return Count.value() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.enable"))
return TM_ForcedByUser;
@@ -1246,6 +1243,20 @@ static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet)
return true;
}
+/// Checks if it is safe to call InductionDescriptor::isInductionPHI for \p Phi,
+/// and returns true if this Phi is an induction phi in the loop. When
+/// isInductionPHI returns true, \p ID will be also be set by isInductionPHI.
+static bool checkIsIndPhi(PHINode *Phi, Loop *L, ScalarEvolution *SE,
+ InductionDescriptor &ID) {
+ if (!Phi)
+ return false;
+ if (!L->getLoopPreheader())
+ return false;
+ if (Phi->getParent() != L->getHeader())
+ return false;
+ return InductionDescriptor::isInductionPHI(Phi, L, SE, ID);
+}
+
int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
ScalarEvolution *SE,
const TargetTransformInfo *TTI,
@@ -1297,6 +1308,46 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
if (!L->contains(Inst))
continue;
+ // Find exit values which are induction variables in the loop, and are
+ // unused in the loop, with the only use being the exit block PhiNode,
+ // and the induction variable update binary operator.
+ // The exit value can be replaced with the final value when it is cheap
+ // to do so.
+ if (ReplaceExitValue == UnusedIndVarInLoop) {
+ InductionDescriptor ID;
+ PHINode *IndPhi = dyn_cast<PHINode>(Inst);
+ if (IndPhi) {
+ if (!checkIsIndPhi(IndPhi, L, SE, ID))
+ continue;
+ // This is an induction PHI. Check that the only users are PHI
+ // nodes, and induction variable update binary operators.
+ if (llvm::any_of(Inst->users(), [&](User *U) {
+ if (!isa<PHINode>(U) && !isa<BinaryOperator>(U))
+ return true;
+ BinaryOperator *B = dyn_cast<BinaryOperator>(U);
+ if (B && B != ID.getInductionBinOp())
+ return true;
+ return false;
+ }))
+ continue;
+ } else {
+ // If it is not an induction phi, it must be an induction update
+ // binary operator with an induction phi user.
+ BinaryOperator *B = dyn_cast<BinaryOperator>(Inst);
+ if (!B)
+ continue;
+ if (llvm::any_of(Inst->users(), [&](User *U) {
+ PHINode *Phi = dyn_cast<PHINode>(U);
+ if (Phi != PN && !checkIsIndPhi(Phi, L, SE, ID))
+ return true;
+ return false;
+ }))
+ continue;
+ if (B != ID.getInductionBinOp())
+ continue;
+ }
+ }
+
// Okay, this instruction has a user outside of the current loop
// and varies predictably *inside* the loop. Evaluate the value it
// contains when the loop exits, if possible. We prefer to start with
@@ -1362,7 +1413,9 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
// Only do the rewrite when the ExitValue can be expanded cheaply.
// If LoopCanBeDel is true, rewrite exit value aggressively.
- if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost)
+ if ((ReplaceExitValue == OnlyCheapRepl ||
+ ReplaceExitValue == UnusedIndVarInLoop) &&
+ !LoopCanBeDel && Phi.HighCost)
continue;
Value *ExitVal = Rewriter.expandCodeFor(
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
index 8641581c8039..9914a5ca6c5e 100644
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -74,6 +74,10 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
return Builder.CreateFAdd(Loaded, Inc, "new");
case AtomicRMWInst::FSub:
return Builder.CreateFSub(Loaded, Inc, "new");
+ case AtomicRMWInst::FMax:
+ return Builder.CreateMaxNum(Loaded, Inc);
+ case AtomicRMWInst::FMin:
+ return Builder.CreateMinNum(Loaded, Inc);
default:
llvm_unreachable("Unknown atomic op");
}
diff --git a/llvm/lib/Transforms/Utils/MisExpect.cpp b/llvm/lib/Transforms/Utils/MisExpect.cpp
index b73d68ebec7c..4414b04c7264 100644
--- a/llvm/lib/Transforms/Utils/MisExpect.cpp
+++ b/llvm/lib/Transforms/Utils/MisExpect.cpp
@@ -221,7 +221,7 @@ void checkBackendInstrumentation(Instruction &I,
auto ExpectedWeightsOpt = extractWeights(&I, I.getContext());
if (!ExpectedWeightsOpt)
return;
- auto ExpectedWeights = ExpectedWeightsOpt.getValue();
+ auto ExpectedWeights = ExpectedWeightsOpt.value();
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
@@ -230,7 +230,7 @@ void checkFrontendInstrumentation(Instruction &I,
auto RealWeightsOpt = extractWeights(&I, I.getContext());
if (!RealWeightsOpt)
return;
- auto RealWeights = RealWeightsOpt.getValue();
+ auto RealWeights = RealWeightsOpt.value();
verifyMisExpect(I, RealWeights, ExpectedWeights);
}
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 5120ade70e16..9e1492b97a86 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -255,7 +255,7 @@ void VFABI::setVectorVariantNames(CallInst *CI,
LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
Optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
assert(VI && "Cannot add an invalid VFABI name.");
- assert(M->getNamedValue(VI.getValue().VectorName) &&
+ assert(M->getNamedValue(VI.value().VectorName) &&
"Cannot add variant to attribute: "
"vector function declaration is missing.");
}
@@ -275,5 +275,13 @@ void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
GV->setSection(SectionName);
GV->setAlignment(Alignment);
+ LLVMContext &Ctx = M.getContext();
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
+ Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
+ MDString::get(Ctx, SectionName)};
+
+ MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+ GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
+
appendToCompilerUsed(M, GV);
}
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index aff692b36288..bec1db896efb 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -488,31 +488,33 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
StoresByIndex,
std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
less_first());
+ Value *ReplVal;
if (I == StoresByIndex.begin()) {
if (StoresByIndex.empty())
// If there are no stores, the load takes the undef value.
- LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+ ReplVal = UndefValue::get(LI->getType());
else
// There is no store before this load, bail out (load may be affected
// by the following stores - see main comment).
return false;
} else {
- // Otherwise, there was a store before this load, the load takes its value.
- // Note, if the load was marked as nonnull we don't want to lose that
- // information when we erase it. So we preserve it with an assume.
- Value *ReplVal = std::prev(I)->second->getOperand(0);
- if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
- !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
- addAssumeNonNull(AC, LI);
+ // Otherwise, there was a store before this load, the load takes its
+ // value.
+ ReplVal = std::prev(I)->second->getOperand(0);
+ }
- // If the replacement value is the load, this must occur in unreachable
- // code.
- if (ReplVal == LI)
- ReplVal = PoisonValue::get(LI->getType());
+ // Note, if the load was marked as nonnull we don't want to lose that
+ // information when we erase it. So we preserve it with an assume.
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+ !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
+ addAssumeNonNull(AC, LI);
- LI->replaceAllUsesWith(ReplVal);
- }
+ // If the replacement value is the load, this must occur in unreachable
+ // code.
+ if (ReplVal == LI)
+ ReplVal = PoisonValue::get(LI->getType());
+ LI->replaceAllUsesWith(ReplVal);
LI->eraseFromParent();
LBI.deleteValue(LI);
}
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index eee91e70292e..09a83f1ea094 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -208,8 +208,6 @@ private:
if (!Elt)
LV.markOverdefined(); // Unknown sort of constant.
- else if (isa<UndefValue>(Elt))
- ; // Undef values remain unknown.
else
LV.markConstant(Elt); // Constants are constant.
}
@@ -356,8 +354,7 @@ public:
// We only track the contents of scalar globals.
if (GV->getValueType()->isSingleValueType()) {
ValueLatticeElement &IV = TrackedGlobals[GV];
- if (!isa<UndefValue>(GV->getInitializer()))
- IV.markConstant(GV->getInitializer());
+ IV.markConstant(GV->getInitializer());
}
}
@@ -822,9 +819,6 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (Constant *OpC = getConstant(OpSt)) {
// Fold the constant as we build.
Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
- if (isa<UndefValue>(C))
- return;
- // Propagate constant value
markConstant(&I, C);
} else if (I.getDestTy()->isIntegerTy()) {
auto &LV = getValueState(&I);
@@ -959,19 +953,15 @@ void SCCPInstVisitor::visitUnaryOperator(Instruction &I) {
if (isOverdefined(IV))
return (void)markOverdefined(&I);
- if (isConstant(V0State)) {
- Constant *C = ConstantExpr::get(I.getOpcode(), getConstant(V0State));
-
- // op Y -> undef.
- if (isa<UndefValue>(C))
- return;
- return (void)markConstant(IV, &I, C);
- }
-
- // If something is undef, wait for it to resolve.
- if (!isOverdefined(V0State))
+ // If something is unknown/undef, wait for it to resolve.
+ if (V0State.isUnknownOrUndef())
return;
+ if (isConstant(V0State))
+ if (Constant *C = ConstantFoldUnaryOpOperand(I.getOpcode(),
+ getConstant(V0State), DL))
+ return (void)markConstant(IV, &I, C);
+
markOverdefined(&I);
}
@@ -999,9 +989,6 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
auto *C = dyn_cast_or_null<Constant>(R);
if (C) {
- // X op Y -> undef.
- if (isa<UndefValue>(C))
- return;
// Conservatively assume that the result may be based on operands that may
// be undef. Note that we use mergeInValue to combine the constant with
// the existing lattice value for I, as different constants might be found
@@ -1050,6 +1037,7 @@ void SCCPInstVisitor::visitCmpInst(CmpInst &I) {
Constant *C = V1State.getCompare(I.getPredicate(), I.getType(), V2State);
if (C) {
+ // TODO: getCompare() currently has incorrect handling for unknown/undef.
if (isa<UndefValue>(C))
return;
ValueLatticeElement CV;
@@ -1095,8 +1083,6 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
Constant *C =
ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
- if (isa<UndefValue>(C))
- return;
markConstant(&I, C);
}
@@ -1174,11 +1160,8 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL)) {
- if (isa<UndefValue>(C))
- return;
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, I.getType(), DL))
return (void)markConstant(IV, &I, C);
- }
}
// Fall back to metadata.
@@ -1223,12 +1206,8 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F))) {
- // call -> undef.
- if (isa<UndefValue>(C))
- return;
+ if (Constant *C = ConstantFoldCall(&CB, F, Operands, &GetTLI(*F)))
return (void)markConstant(&CB, C);
- }
}
// Fall back to metadata.
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 401f1ee5a55d..0c8bf3827256 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -220,7 +220,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
// Fold a binop with constant operands.
if (Constant *CLHS = dyn_cast<Constant>(LHS))
if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantExpr::get(Opcode, CLHS, CRHS);
+ if (Constant *Res = ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, DL))
+ return Res;
// Do a quick scan to see if we have this binop nearby. If so, reuse it.
unsigned ScanLimit = 6;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 567b866f7777..4b5ade99767b 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -377,18 +377,12 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
/// expensive.
static InstructionCost computeSpeculationCost(const User *I,
const TargetTransformInfo &TTI) {
- assert(isSafeToSpeculativelyExecute(I) &&
+ assert((!isa<Instruction>(I) ||
+ isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
"Instruction is not safe to speculatively execute!");
return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
}
-/// Check whether this is a potentially trapping constant.
-static bool canTrap(const Value *V) {
- if (auto *C = dyn_cast<Constant>(V))
- return C->canTrap();
- return false;
-}
-
/// If we have a merge point of an "if condition" as accepted above,
/// return true if the specified value dominates the block. We
/// don't handle the true generality of domination here, just a special case
@@ -421,9 +415,9 @@ static bool dominatesMergePoint(Value *V, BasicBlock *BB,
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- // Non-instructions all dominate instructions, but not all constantexprs
- // can be executed unconditionally.
- return !canTrap(V);
+ // Non-instructions dominate all instructions and can be executed
+ // unconditionally.
+ return true;
}
BasicBlock *PBB = I->getParent();
@@ -1473,10 +1467,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
while (isa<DbgInfoIntrinsic>(I2))
I2 = &*BB2_Itr++;
}
- // FIXME: Can we define a safety predicate for CallBr?
- if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
- (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
- isa<CallBrInst>(I1))
+ if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2))
return false;
BasicBlock *BIParent = BI->getParent();
@@ -1609,11 +1600,6 @@ HoistTerminator:
if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
passingValueIsAlwaysUndefined(BB2V, &PN))
return Changed;
-
- if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
- return Changed;
- if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
- return Changed;
}
}
@@ -2679,9 +2665,6 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
passingValueIsAlwaysUndefined(ThenV, &PN))
return false;
- if (canTrap(OrigV) || canTrap(ThenV))
- return false;
-
HaveRewritablePHIs = true;
ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
@@ -2979,10 +2962,8 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
return true;
}
-static ConstantInt *
-getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To,
- SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>,
- ConstantInt *> &Visited) {
+static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
+ BasicBlock *To) {
// Don't look past the block defining the value, we might get the value from
// a previous loop iteration.
auto *I = dyn_cast<Instruction>(V);
@@ -2996,23 +2977,7 @@ getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To,
return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
: ConstantInt::getFalse(BI->getContext());
- // Limit the amount of blocks we inspect.
- if (Visited.size() >= 8)
- return nullptr;
-
- auto Pair = Visited.try_emplace({From, To}, nullptr);
- if (!Pair.second)
- return Pair.first->second;
-
- // Check whether the known value is the same for all predecessors.
- ConstantInt *Common = nullptr;
- for (BasicBlock *Pred : predecessors(From)) {
- ConstantInt *C = getKnownValueOnEdge(V, Pred, From, Visited);
- if (!C || (Common && Common != C))
- return nullptr;
- Common = C;
- }
- return Visited[{From, To}] = Common;
+ return nullptr;
}
/// If we have a conditional branch on something for which we know the constant
@@ -3022,7 +2987,7 @@ static Optional<bool>
FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
const DataLayout &DL,
AssumptionCache *AC) {
- SmallMapVector<BasicBlock *, ConstantInt *, 8> KnownValues;
+ SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
BasicBlock *BB = BI->getParent();
Value *Cond = BI->getCondition();
PHINode *PN = dyn_cast<PHINode>(Cond);
@@ -3035,12 +3000,11 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
for (Use &U : PN->incoming_values())
if (auto *CB = dyn_cast<ConstantInt>(U))
- KnownValues.insert({PN->getIncomingBlock(U), CB});
+ KnownValues[CB].insert(PN->getIncomingBlock(U));
} else {
- SmallDenseMap<std::pair<BasicBlock *, BasicBlock *>, ConstantInt *> Visited;
for (BasicBlock *Pred : predecessors(BB)) {
- if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB, Visited))
- KnownValues.insert({Pred, CB});
+ if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
+ KnownValues[CB].insert(Pred);
}
}
@@ -3056,29 +3020,34 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
for (const auto &Pair : KnownValues) {
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
- ConstantInt *CB = Pair.second;
- BasicBlock *PredBB = Pair.first;
+ ConstantInt *CB = Pair.first;
+ ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
if (RealDest == BB)
continue; // Skip self loops.
+
// Skip if the predecessor's terminator is an indirect branch.
- if (isa<IndirectBrInst>(PredBB->getTerminator()))
+ if (any_of(PredBBs, [](BasicBlock *PredBB) {
+ return isa<IndirectBrInst>(PredBB->getTerminator());
+ }))
continue;
- SmallVector<DominatorTree::UpdateType, 3> Updates;
+ LLVM_DEBUG({
+ dbgs() << "Condition " << *Cond << " in " << BB->getName()
+ << " has value " << *Pair.first << " in predecessors:\n";
+ for (const BasicBlock *PredBB : Pair.second)
+ dbgs() << " " << PredBB->getName() << "\n";
+ dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
+ });
+
+ // Split the predecessors we are threading into a new edge block. We'll
+ // clone the instructions into this block, and then redirect it to RealDest.
+ BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
- // The dest block might have PHI nodes, other predecessors and other
- // difficult cases. Instead of being smart about this, just insert a new
- // block that jumps to the destination block, effectively splitting
- // the edge we are about to create.
- BasicBlock *EdgeBB =
- BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
- RealDest->getParent(), RealDest);
- BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
- if (DTU)
- Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
- CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
+ // TODO: These just exist to reduce test diff, we can drop them if we like.
+ EdgeBB->setName(RealDest->getName() + ".critedge");
+ EdgeBB->moveBefore(RealDest);
// Update PHI nodes.
AddPredecessorToBlock(RealDest, EdgeBB, BB);
@@ -3086,12 +3055,12 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
// BB may have instructions that are being threaded over. Clone these
// instructions into EdgeBB. We know that there will be no uses of the
// cloned instructions outside of EdgeBB.
- BasicBlock::iterator InsertPt = EdgeBB->begin();
+ BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
DenseMap<Value *, Value *> TranslateMap; // Track translated values.
- TranslateMap[Cond] = Pair.second;
+ TranslateMap[Cond] = CB;
for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+ TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
continue;
}
// Clone the instruction.
@@ -3129,19 +3098,15 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
}
}
- // Loop over all of the edges from PredBB to BB, changing them to branch
- // to EdgeBB instead.
- Instruction *PredBBTI = PredBB->getTerminator();
- for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
- if (PredBBTI->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
- PredBBTI->setSuccessor(i, EdgeBB);
- }
+ BB->removePredecessor(EdgeBB);
+ BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
+ EdgeBI->setSuccessor(0, RealDest);
+ EdgeBI->setDebugLoc(BI->getDebugLoc());
if (DTU) {
- Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
- Updates.push_back({DominatorTree::Delete, PredBB, BB});
-
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
+ Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
DTU->applyUpdates(Updates);
}
@@ -3599,13 +3564,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
Cond->getParent() != BB || !Cond->hasOneUse())
return false;
- // Cond is known to be a compare or binary operator. Check to make sure that
- // neither operand is a potentially-trapping constant expression.
- if (canTrap(Cond->getOperand(0)))
- return false;
- if (canTrap(Cond->getOperand(1)))
- return false;
-
// Finally, don't infinitely unroll conditional loops.
if (is_contained(successors(BB), BB))
return false;
@@ -4113,9 +4071,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
return true;
- if (canTrap(BI->getCondition()))
- return false;
-
// If both branches are conditional and both contain stores to the same
// address, remove the stores from the conditionals and create a conditional
// merged store at the end.
@@ -4157,10 +4112,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
- // Also do not perform this transformation if any phi node in the common
- // destination block can trap when reached by BB or PBB (PR17073). In that
- // case, it would be unsafe to hoist the operation into a select instruction.
-
BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
unsigned NumPhis = 0;
@@ -4168,16 +4119,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
++II, ++NumPhis) {
if (NumPhis > 2) // Disable this xform.
return false;
-
- PHINode *PN = cast<PHINode>(II);
- Value *BIV = PN->getIncomingValueForBlock(BB);
- if (canTrap(BIV))
- return false;
-
- unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
- Value *PBIV = PN->getIncomingValue(PBBIdx);
- if (canTrap(PBIV))
- return false;
}
// Finally, if everything is ok, fold the branches to logical ops.
@@ -6174,6 +6115,23 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
return isSwitchDense(SI->getNumCases(), TableSize);
}
+static bool ShouldUseSwitchConditionAsTableIndex(
+ ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
+ bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
+ const DataLayout &DL, const TargetTransformInfo &TTI) {
+ if (MinCaseVal.isNullValue())
+ return true;
+ if (MinCaseVal.isNegative() ||
+ MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
+ !HasDefaultResults)
+ return false;
+ return all_of(ResultTypes, [&](const auto &KV) {
+ return SwitchLookupTable::WouldFitInRegister(
+ DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
+ KV.second /* ResultType */);
+ });
+}
+
/// Try to reuse the switch table index compare. Following pattern:
/// \code
/// if (idx < tablesize)
@@ -6329,9 +6287,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
}
uint64_t NumResults = ResultLists[PHIs[0]].size();
- APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
- uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
- bool TableHasHoles = (NumResults < TableSize);
// If the table has holes, we need a constant result for the default case
// or a bitmask that fits in a register.
@@ -6340,6 +6295,22 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
DefaultResultsList, DL, TTI);
+ for (const auto &I : DefaultResultsList) {
+ PHINode *PHI = I.first;
+ Constant *Result = I.second;
+ DefaultResults[PHI] = Result;
+ }
+
+ bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
+ *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
+ uint64_t TableSize;
+ if (UseSwitchConditionAsTableIndex)
+ TableSize = MaxCaseVal->getLimitedValue() + 1;
+ else
+ TableSize =
+ (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+
+ bool TableHasHoles = (NumResults < TableSize);
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
@@ -6349,12 +6320,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
return false;
}
- for (const auto &I : DefaultResultsList) {
- PHINode *PHI = I.first;
- Constant *Result = I.second;
- DefaultResults[PHI] = Result;
- }
-
if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
return false;
@@ -6368,11 +6333,15 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Compute the table index value.
Builder.SetInsertPoint(SI);
Value *TableIndex;
- if (MinCaseVal->isNullValue())
+ ConstantInt *TableIndexOffset;
+ if (UseSwitchConditionAsTableIndex) {
+ TableIndexOffset = ConstantInt::get(MaxCaseVal->getType(), 0);
TableIndex = SI->getCondition();
- else
- TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
- "switch.tableidx");
+ } else {
+ TableIndexOffset = MinCaseVal;
+ TableIndex =
+ Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx");
+ }
// Compute the maximum table size representable by the integer type we are
// switching upon.
@@ -6424,7 +6393,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// Build bitmask; fill in a 1 bit for every case.
const ResultListTy &ResultList = ResultLists[PHIs[0]];
for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
- uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
+ uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
.getLimitedValue();
MaskInt |= One << Idx;
}
@@ -6463,8 +6432,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
// If using a bitmask, use any value to fill the lookup table holes.
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
StringRef FuncName = Fn->getName();
- SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
- FuncName);
+ SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
+ DL, FuncName);
Value *Result = Table.BuildLookup(TableIndex, Builder);
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index dbef1ff2e739..af15e0c31b75 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -79,21 +79,23 @@ namespace {
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
bool replaceIVUserWithLoopInvariant(Instruction *UseInst);
+ bool replaceFloatIVWithIntegerIV(Instruction *UseInst);
bool eliminateOverflowIntrinsic(WithOverflowInst *WO);
bool eliminateSaturatingIntrinsic(SaturatingInst *SI);
bool eliminateTrunc(TruncInst *TI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
- bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);
- void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
- void simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+ bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand);
+ void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand);
+ void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand,
bool IsSigned);
void replaceRemWithNumerator(BinaryOperator *Rem);
void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
void replaceSRemWithURem(BinaryOperator *Rem);
bool eliminateSDiv(BinaryOperator *SDiv);
- bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand);
- bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand);
+ bool strengthenOverflowingOperation(BinaryOperator *OBO,
+ Instruction *IVOperand);
+ bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
};
}
@@ -192,7 +194,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
}
bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
- Value *IVOperand) {
+ Instruction *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
if (IVOperand != ICmp->getOperand(0)) {
@@ -261,7 +263,8 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
/// SimplifyIVUsers helper for eliminating useless
/// comparisons against an induction variable.
-void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
+void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp,
+ Instruction *IVOperand) {
unsigned IVOperIdx = 0;
ICmpInst::Predicate Pred = ICmp->getPredicate();
ICmpInst::Predicate OriginalPred = Pred;
@@ -372,7 +375,8 @@ void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {
/// SimplifyIVUsers helper for eliminating useless remainder operations
/// operating on an induction variable or replacing srem by urem.
-void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem, Value *IVOperand,
+void SimplifyIndvar::simplifyIVRemainder(BinaryOperator *Rem,
+ Instruction *IVOperand,
bool IsSigned) {
auto *NValue = Rem->getOperand(0);
auto *DValue = Rem->getOperand(1);
@@ -673,6 +677,35 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {
return true;
}
+/// Eliminate redundant type cast between integer and float.
+bool SimplifyIndvar::replaceFloatIVWithIntegerIV(Instruction *UseInst) {
+ if (UseInst->getOpcode() != CastInst::SIToFP)
+ return false;
+
+ Value *IVOperand = UseInst->getOperand(0);
+ // Get the symbolic expression for this instruction.
+ ConstantRange IVRange = SE->getSignedRange(SE->getSCEV(IVOperand));
+ unsigned DestNumSigBits = UseInst->getType()->getFPMantissaWidth();
+ if (IVRange.getActiveBits() <= DestNumSigBits) {
+ for (User *U : UseInst->users()) {
+ // Match for fptosi of sitofp and with same type.
+ auto *CI = dyn_cast<FPToSIInst>(U);
+ if (!CI || IVOperand->getType() != CI->getType())
+ continue;
+
+ CI->replaceAllUsesWith(IVOperand);
+ DeadInsts.push_back(CI);
+ LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *CI
+ << " with: " << *IVOperand << '\n');
+
+ ++NumFoldedUser;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// Eliminate any operation that SCEV can prove is an identity function.
bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
Instruction *IVOperand) {
@@ -718,18 +751,16 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
/// unsigned-overflow. Returns true if anything changed, false otherwise.
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
- Value *IVOperand) {
- SCEV::NoWrapFlags Flags;
- bool Deduced;
- std::tie(Flags, Deduced) = SE->getStrengthenedNoWrapFlagsFromBinOp(
+ Instruction *IVOperand) {
+ auto Flags = SE->getStrengthenedNoWrapFlagsFromBinOp(
cast<OverflowingBinaryOperator>(BO));
- if (!Deduced)
- return Deduced;
+ if (!Flags)
+ return false;
- BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNUW) ==
+ BO->setHasNoUnsignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNUW) ==
SCEV::FlagNUW);
- BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(Flags, SCEV::FlagNSW) ==
+ BO->setHasNoSignedWrap(ScalarEvolution::maskFlags(*Flags, SCEV::FlagNSW) ==
SCEV::FlagNSW);
// The getStrengthenedNoWrapFlagsFromBinOp() check inferred additional nowrap
@@ -737,14 +768,14 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
// forgetValue() here to make sure those flags also propagate to any other
// SCEV expressions based on the addrec. However, this can have pathological
// compile-time impact, see https://bugs.llvm.org/show_bug.cgi?id=50384.
- return Deduced;
+ return true;
}
/// Annotate the Shr in (X << IVOperand) >> C as exact using the
/// information from the IV's range. Returns true if anything changed, false
/// otherwise.
bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO,
- Value *IVOperand) {
+ Instruction *IVOperand) {
using namespace llvm::PatternMatch;
if (BO->getOpcode() == Instruction::Shl) {
@@ -896,6 +927,13 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
}
}
+ // Try to use integer induction for FPToSI of float induction directly.
+ if (replaceFloatIVWithIntegerIV(UseInst)) {
+ // Re-queue the potentially new direct uses of IVOperand.
+ pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
+ continue;
+ }
+
CastInst *Cast = dyn_cast<CastInst>(UseInst);
if (V && Cast) {
V->visitCast(Cast);
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index f4306bb43dfd..b359717424a6 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -75,7 +75,8 @@ static bool callHasFP128Argument(const CallInst *CI) {
});
}
-static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
+static Value *convertStrToNumber(CallInst *CI, StringRef &Str, Value *EndPtr,
+ int64_t Base, IRBuilderBase &B) {
if (Base < 2 || Base > 36)
// handle special zero base
if (Base != 0)
@@ -97,6 +98,15 @@ static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) {
if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result))
return nullptr;
+ if (EndPtr) {
+ // Store the pointer to the end.
+ uint64_t ILen = End - nptr.c_str();
+ Value *Off = B.getInt64(ILen);
+ Value *StrBeg = CI->getArgOperand(0);
+ Value *StrEnd = B.CreateInBoundsGEP(B.getInt8Ty(), StrBeg, Off, "endptr");
+ B.CreateStore(StrEnd, EndPtr);
+ }
+
return ConstantInt::get(CI->getType(), Result);
}
@@ -295,31 +305,69 @@ Value *LibCallSimplifier::optimizeStrNCat(CallInst *CI, IRBuilderBase &B) {
return copyFlags(*CI, emitStrLenMemCpy(Src, Dst, SrcLen, B));
}
+// Helper to transform memchr(S, C, N) == S to N && *S == C and, when
+// NBytes is null, strchr(S, C) to *S == C. A precondition of the function
+// is that either S is dereferenceable or the value of N is nonzero.
+static Value* memChrToCharCompare(CallInst *CI, Value *NBytes,
+ IRBuilderBase &B, const DataLayout &DL)
+{
+ Value *Src = CI->getArgOperand(0);
+ Value *CharVal = CI->getArgOperand(1);
+
+ // Fold memchr(A, C, N) == A to N && *A == C.
+ Type *CharTy = B.getInt8Ty();
+ Value *Char0 = B.CreateLoad(CharTy, Src);
+ CharVal = B.CreateTrunc(CharVal, CharTy);
+ Value *Cmp = B.CreateICmpEQ(Char0, CharVal, "char0cmp");
+
+ if (NBytes) {
+ Value *Zero = ConstantInt::get(NBytes->getType(), 0);
+ Value *And = B.CreateICmpNE(NBytes, Zero);
+ Cmp = B.CreateLogicalAnd(And, Cmp);
+ }
+
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+ return B.CreateSelect(Cmp, Src, NullPtr);
+}
+
Value *LibCallSimplifier::optimizeStrChr(CallInst *CI, IRBuilderBase &B) {
- Function *Callee = CI->getCalledFunction();
- FunctionType *FT = Callee->getFunctionType();
Value *SrcStr = CI->getArgOperand(0);
+ Value *CharVal = CI->getArgOperand(1);
annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ return memChrToCharCompare(CI, nullptr, B, DL);
+
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
if (!CharC) {
uint64_t Len = GetStringLength(SrcStr);
if (Len)
annotateDereferenceableBytes(CI, 0, Len);
else
return nullptr;
+
+ Function *Callee = CI->getCalledFunction();
+ FunctionType *FT = Callee->getFunctionType();
if (!FT->getParamType(1)->isIntegerTy(32)) // memchr needs i32.
return nullptr;
return copyFlags(
*CI,
- emitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+ emitMemChr(SrcStr, CharVal, // include nul.
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), B,
DL, TLI));
}
+ if (CharC->isZero()) {
+ Value *NullPtr = Constant::getNullValue(CI->getType());
+ if (isOnlyUsedInEqualityComparison(CI, NullPtr))
+ // Pre-empt the transformation to strlen below and fold
+ // strchr(A, '\0') == null to false.
+ return B.CreateIntToPtr(B.getTrue(), CI->getType());
+ }
+
// Otherwise, the character is a constant, see if the first argument is
// a string literal. If so, we can constant fold.
StringRef Str;
@@ -1008,8 +1056,12 @@ Value *LibCallSimplifier::optimizeMemRChr(CallInst *CI, IRBuilderBase &B) {
Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
Value *SrcStr = CI->getArgOperand(0);
Value *Size = CI->getArgOperand(2);
- if (isKnownNonZero(Size, DL))
+
+ if (isKnownNonZero(Size, DL)) {
annotateNonNullNoUndefBasedOnAccess(CI, 0);
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ return memChrToCharCompare(CI, Size, B, DL);
+ }
Value *CharVal = CI->getArgOperand(1);
ConstantInt *CharC = dyn_cast<ConstantInt>(CharVal);
@@ -1099,9 +1151,16 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
return B.CreateSelect(And, SrcStr, Sel1, "memchr.sel2");
}
- if (!LenC)
+ if (!LenC) {
+ if (isOnlyUsedInEqualityComparison(CI, SrcStr))
+ // S is dereferenceable so it's safe to load from it and fold
+ // memchr(S, C, N) == S to N && *S == C for any C and N.
+ // TODO: This is safe even even for nonconstant S.
+ return memChrToCharCompare(CI, Size, B, DL);
+
// From now on we need a constant length and constant array.
return nullptr;
+ }
// If the char is variable but the input str and length are not we can turn
// this memchr call into a simple bit field test. Of course this only works
@@ -1589,31 +1648,6 @@ static Value *optimizeTrigReflections(CallInst *Call, LibFunc Func,
return nullptr;
}
-static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilderBase &B) {
- // Multiplications calculated using Addition Chains.
- // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
-
- assert(Exp != 0 && "Incorrect exponent 0 not handled");
-
- if (InnerChain[Exp])
- return InnerChain[Exp];
-
- static const unsigned AddChain[33][2] = {
- {0, 0}, // Unused.
- {0, 0}, // Unused (base case = pow1).
- {1, 1}, // Unused (pre-computed).
- {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4},
- {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7},
- {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10},
- {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
- {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
- };
-
- InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
- getPow(InnerChain, AddChain[Exp][1], B));
- return InnerChain[Exp];
-}
-
// Return a properly extended integer (DstWidth bits wide) if the operation is
// an itofp.
static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
@@ -1914,70 +1948,52 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
if (Value *Sqrt = replacePowWithSqrt(Pow, B))
return Sqrt;
- // pow(x, n) -> x * x * x * ...
+ // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction
const APFloat *ExpoF;
- if (AllowApprox && match(Expo, m_APFloat(ExpoF)) &&
- !ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)) {
- // We limit to a max of 7 multiplications, thus the maximum exponent is 32.
- // If the exponent is an integer+0.5 we generate a call to sqrt and an
- // additional fmul.
- // TODO: This whole transformation should be backend specific (e.g. some
- // backends might prefer libcalls or the limit for the exponent might
- // be different) and it should also consider optimizing for size.
- APFloat LimF(ExpoF->getSemantics(), 33),
- ExpoA(abs(*ExpoF));
- if (ExpoA < LimF) {
- // This transformation applies to integer or integer+0.5 exponents only.
- // For integer+0.5, we create a sqrt(Base) call.
- Value *Sqrt = nullptr;
- if (!ExpoA.isInteger()) {
- APFloat Expo2 = ExpoA;
- // To check if ExpoA is an integer + 0.5, we add it to itself. If there
- // is no floating point exception and the result is an integer, then
- // ExpoA == integer + 0.5
- if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
- return nullptr;
-
- if (!Expo2.isInteger())
- return nullptr;
-
- Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
- Pow->doesNotAccessMemory(), M, B, TLI);
- if (!Sqrt)
- return nullptr;
- }
-
- // We will memoize intermediate products of the Addition Chain.
- Value *InnerChain[33] = {nullptr};
- InnerChain[1] = Base;
- InnerChain[2] = B.CreateFMul(Base, Base, "square");
-
- // We cannot readily convert a non-double type (like float) to a double.
- // So we first convert it to something which could be converted to double.
- ExpoA.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &Ignored);
- Value *FMul = getPow(InnerChain, ExpoA.convertToDouble(), B);
+ if (match(Expo, m_APFloat(ExpoF)) && !ExpoF->isExactlyValue(0.5) &&
+ !ExpoF->isExactlyValue(-0.5)) {
+ APFloat ExpoA(abs(*ExpoF));
+ APFloat ExpoI(*ExpoF);
+ Value *Sqrt = nullptr;
+ if (AllowApprox && !ExpoA.isInteger()) {
+ APFloat Expo2 = ExpoA;
+ // To check if ExpoA is an integer + 0.5, we add it to itself. If there
+ // is no floating point exception and the result is an integer, then
+ // ExpoA == integer + 0.5
+ if (Expo2.add(ExpoA, APFloat::rmNearestTiesToEven) != APFloat::opOK)
+ return nullptr;
- // Expand pow(x, y+0.5) to pow(x, y) * sqrt(x).
- if (Sqrt)
- FMul = B.CreateFMul(FMul, Sqrt);
+ if (!Expo2.isInteger())
+ return nullptr;
- // If the exponent is negative, then get the reciprocal.
- if (ExpoF->isNegative())
- FMul = B.CreateFDiv(ConstantFP::get(Ty, 1.0), FMul, "reciprocal");
+ if (ExpoI.roundToIntegral(APFloat::rmTowardNegative) !=
+ APFloat::opInexact)
+ return nullptr;
+ if (!ExpoI.isInteger())
+ return nullptr;
+ ExpoF = &ExpoI;
- return FMul;
+ Sqrt = getSqrtCall(Base, Pow->getCalledFunction()->getAttributes(),
+ Pow->doesNotAccessMemory(), M, B, TLI);
+ if (!Sqrt)
+ return nullptr;
}
+ // pow(x, n) -> powi(x, n) if n is a constant signed integer value
APSInt IntExpo(TLI->getIntSize(), /*isUnsigned=*/false);
- // powf(x, n) -> powi(x, n) if n is a constant signed integer value
if (ExpoF->isInteger() &&
ExpoF->convertToInteger(IntExpo, APFloat::rmTowardZero, &Ignored) ==
APFloat::opOK) {
- return copyFlags(
+ Value *PowI = copyFlags(
*Pow,
createPowWithIntegerExponent(
Base, ConstantInt::get(B.getIntNTy(TLI->getIntSize()), IntExpo),
M, B));
+
+ if (PowI && Sqrt)
+ return B.CreateFMul(PowI, Sqrt);
+
+ return PowI;
}
}
@@ -2517,7 +2533,7 @@ Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilderBase &B) {
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
- return convertStrToNumber(CI, Str, 10);
+ return convertStrToNumber(CI, Str, nullptr, 10, B);
}
Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) {
@@ -2525,11 +2541,14 @@ Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilderBase &B) {
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
return nullptr;
- if (!isa<ConstantPointerNull>(CI->getArgOperand(1)))
+ Value *EndPtr = CI->getArgOperand(1);
+ if (isa<ConstantPointerNull>(EndPtr))
+ EndPtr = nullptr;
+ else if (!isKnownNonZero(EndPtr, DL))
return nullptr;
if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) {
- return convertStrToNumber(CI, Str, CInt->getSExtValue());
+ return convertStrToNumber(CI, Str, EndPtr, CInt->getSExtValue(), B);
}
return nullptr;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 6242d9a93fc1..183ba86abcb4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -386,20 +386,6 @@ static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
return true;
}
-/// Check whether it is safe to if-convert this phi node.
-///
-/// Phi nodes with constant expressions that can trap are not safe to if
-/// convert.
-static bool canIfConvertPHINodes(BasicBlock *BB) {
- for (PHINode &Phi : BB->phis()) {
- for (Value *V : Phi.incoming_values())
- if (auto *C = dyn_cast<Constant>(V))
- if (C->canTrap())
- return false;
- }
- return true;
-}
-
static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
if (Ty->isPointerTy())
return DL.getIntPtrType(Ty);
@@ -993,7 +979,6 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
- Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
PSE.addPredicate(LAI->getPSE().getPredicate());
return true;
}
@@ -1098,13 +1083,6 @@ bool LoopVectorizationLegality::blockCanBePredicated(
SmallPtrSetImpl<const Instruction *> &MaskedOp,
SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
for (Instruction &I : *BB) {
- // Check that we don't have a constant expression that can trap as operand.
- for (Value *Operand : I.operands()) {
- if (auto *C = dyn_cast<Constant>(Operand))
- if (C->canTrap())
- return false;
- }
-
// We can predicate blocks with calls to assume, as long as we drop them in
// case we flatten the CFG via predication.
if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
@@ -1190,7 +1168,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
}
// Collect the blocks that need predication.
- BasicBlock *Header = TheLoop->getHeader();
for (BasicBlock *BB : TheLoop->blocks()) {
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator())) {
@@ -1212,13 +1189,6 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
BB->getTerminator());
return false;
}
- } else if (BB != Header && !canIfConvertPHINodes(BB)) {
- reportVectorizationFailure(
- "Control flow cannot be substituted for a select",
- "control flow cannot be substituted for a select",
- "NoCFGForSelect", ORE, TheLoop,
- BB->getTerminator());
- return false;
}
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 0cb2032fa45a..2e9a9fe0640e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -33,7 +33,6 @@ class LoopInfo;
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
class PredicatedScalarEvolution;
-class LoopVectorizationRequirements;
class LoopVectorizeHints;
class OptimizationRemarkEmitter;
class TargetTransformInfo;
@@ -46,8 +45,9 @@ class VPBuilder {
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
VPInstruction *createInstruction(unsigned Opcode,
- ArrayRef<VPValue *> Operands, DebugLoc DL) {
- VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL);
+ ArrayRef<VPValue *> Operands, DebugLoc DL,
+ const Twine &Name = "") {
+ VPInstruction *Instr = new VPInstruction(Opcode, Operands, DL, Name);
if (BB)
BB->insert(Instr, InsertPt);
return Instr;
@@ -55,8 +55,8 @@ class VPBuilder {
VPInstruction *createInstruction(unsigned Opcode,
std::initializer_list<VPValue *> Operands,
- DebugLoc DL) {
- return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL);
+ DebugLoc DL, const Twine &Name = "") {
+ return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
}
public:
@@ -124,34 +124,37 @@ public:
/// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
/// its underlying Instruction.
VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
- Instruction *Inst = nullptr) {
+ Instruction *Inst = nullptr, const Twine &Name = "") {
DebugLoc DL;
if (Inst)
DL = Inst->getDebugLoc();
- VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL);
+ VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
NewVPInst->setUnderlyingValue(Inst);
return NewVPInst;
}
VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
- DebugLoc DL) {
- return createInstruction(Opcode, Operands, DL);
+ DebugLoc DL, const Twine &Name = "") {
+ return createInstruction(Opcode, Operands, DL, Name);
}
- VPValue *createNot(VPValue *Operand, DebugLoc DL) {
- return createInstruction(VPInstruction::Not, {Operand}, DL);
+ VPValue *createNot(VPValue *Operand, DebugLoc DL, const Twine &Name = "") {
+ return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
}
- VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL) {
- return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL);
+ VPValue *createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL,
+ const Twine &Name = "") {
+ return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
}
- VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL) {
- return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL);
+ VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL,
+ const Twine &Name = "") {
+ return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name);
}
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
- DebugLoc DL) {
- return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL);
+ DebugLoc DL, const Twine &Name = "") {
+ return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,
+ Name);
}
//===--------------------------------------------------------------------===//
@@ -191,6 +194,10 @@ struct VectorizationFactor {
/// Cost of the scalar loop.
InstructionCost ScalarCost;
+ /// The minimum trip count required to make vectorization profitable, e.g. due
+ /// to runtime checks.
+ ElementCount MinProfitableTripCount;
+
VectorizationFactor(ElementCount Width, InstructionCost Cost,
InstructionCost ScalarCost)
: Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
@@ -268,8 +275,6 @@ class LoopVectorizationPlanner {
const LoopVectorizeHints &Hints;
- LoopVectorizationRequirements &Requirements;
-
OptimizationRemarkEmitter *ORE;
SmallVector<VPlanPtr, 4> VPlans;
@@ -285,10 +290,9 @@ public:
InterleavedAccessInfo &IAI,
PredicatedScalarEvolution &PSE,
const LoopVectorizeHints &Hints,
- LoopVectorizationRequirements &Requirements,
OptimizationRemarkEmitter *ORE)
: OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
- PSE(PSE), Hints(Hints), Requirements(Requirements), ORE(ORE) {}
+ PSE(PSE), Hints(Hints), ORE(ORE) {}
/// Plan how to best vectorize, return the best VF and its cost, or None if
/// vectorization and interleaving should be avoided up front.
@@ -332,11 +336,6 @@ public:
bool requiresTooManyRuntimeChecks() const;
protected:
- /// Collect the instructions from the original loop that would be trivially
- /// dead in the vectorized loop if generated.
- void collectTriviallyDeadInstructions(
- SmallPtrSetImpl<Instruction *> &DeadInstructions);
-
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
/// legal to vectorize the loop.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b637b2d5ddae..0777a1385916 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -196,10 +196,9 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold(
"value are vectorized only if no scalar iteration overheads "
"are incurred."));
-static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
- "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
- cl::desc("The maximum allowed number of runtime memory checks with a "
- "vectorize(enable) pragma."));
+static cl::opt<unsigned> VectorizeMemoryCheckThreshold(
+ "vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
+ cl::desc("The maximum allowed number of runtime memory checks"));
// Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
// that predication is preferred, and this lists all options. I.e., the
@@ -442,6 +441,7 @@ public:
const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, ElementCount VecWidth,
+ ElementCount MinProfitableTripCount,
unsigned UnrollFactor, LoopVectorizationLegality *LVL,
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks)
@@ -453,6 +453,11 @@ public:
// of the original loop header may change as the transformation happens.
OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
OrigLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
+
+ if (MinProfitableTripCount.isZero())
+ this->MinProfitableTripCount = VecWidth;
+ else
+ this->MinProfitableTripCount = MinProfitableTripCount;
}
virtual ~InnerLoopVectorizer() = default;
@@ -656,6 +661,8 @@ protected:
/// vector elements.
ElementCount VF;
+ ElementCount MinProfitableTripCount;
+
/// The vectorization unroll factor to use. Each scalar is vectorized to this
/// many different vector instructions.
unsigned UF;
@@ -735,6 +742,7 @@ public:
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, GeneratedRTChecks &Check)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
+ ElementCount::getFixed(1),
ElementCount::getFixed(1), UnrollFactor, LVL, CM,
BFI, PSI, Check) {}
@@ -783,8 +791,8 @@ public:
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI,
- Checks),
+ EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL,
+ CM, BFI, PSI, Checks),
EPI(EPI) {}
// Override this function to handle the more complex control flow around the
@@ -1018,7 +1026,8 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
if (isa<VPWidenMemoryInstructionRecipe>(CurRec) ||
isa<VPInterleaveRecipe>(CurRec) ||
isa<VPScalarIVStepsRecipe>(CurRec) ||
- isa<VPCanonicalIVPHIRecipe>(CurRec))
+ isa<VPCanonicalIVPHIRecipe>(CurRec) ||
+ isa<VPActiveLaneMaskPHIRecipe>(CurRec))
continue;
// This recipe contributes to the address computation of a widen
@@ -1503,6 +1512,13 @@ public:
/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const { return FoldTailByMasking; }
+ /// Returns true if were tail-folding and want to use the active lane mask
+ /// for vector loop control flow.
+ bool useActiveLaneMaskForControlFlow() const {
+ return FoldTailByMasking &&
+ TTI.emitGetActiveLaneMask() == PredicationStyle::DataAndControlFlow;
+ }
+
/// Returns true if the instructions in this block requires predication
/// for any reason, e.g. because tail folding now requires a predicate
/// or because the block in the original loop was predicated.
@@ -1551,14 +1567,14 @@ public:
Scalars.clear();
}
-private:
- unsigned NumPredStores = 0;
-
/// Convenience function that returns the value of vscale_range iff
/// vscale_range.min == vscale_range.max or otherwise returns the value
/// returned by the corresponding TLI method.
Optional<unsigned> getVScaleForTuning() const;
+private:
+ unsigned NumPredStores = 0;
+
/// \return An upper bound for the vectorization factors for both
/// fixed and scalable vectorization, where the minimum-known number of
/// elements is a power-of-2 larger than zero. If scalable vectorization is
@@ -1661,7 +1677,8 @@ private:
/// A set containing all BasicBlocks that are known to present after
/// vectorization as a predicated block.
- SmallPtrSet<BasicBlock *, 4> PredicatedBBsAfterVectorization;
+ DenseMap<ElementCount, SmallPtrSet<BasicBlock *, 4>>
+ PredicatedBBsAfterVectorization;
/// Records whether it is allowed to have the original scalar loop execute at
/// least once. This may be needed as a fallback loop in case runtime
@@ -1849,14 +1866,17 @@ class GeneratedRTChecks {
DominatorTree *DT;
LoopInfo *LI;
+ TargetTransformInfo *TTI;
SCEVExpander SCEVExp;
SCEVExpander MemCheckExp;
+ bool CostTooHigh = false;
+
public:
GeneratedRTChecks(ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI,
- const DataLayout &DL)
- : DT(DT), LI(LI), SCEVExp(SE, DL, "scev.check"),
+ TargetTransformInfo *TTI, const DataLayout &DL)
+ : DT(DT), LI(LI), TTI(TTI), SCEVExp(SE, DL, "scev.check"),
MemCheckExp(SE, DL, "scev.check") {}
/// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
@@ -1867,6 +1887,15 @@ public:
void Create(Loop *L, const LoopAccessInfo &LAI,
const SCEVPredicate &UnionPred, ElementCount VF, unsigned IC) {
+ // Hard cutoff to limit compile-time increase in case a very large number of
+ // runtime checks needs to be generated.
+ // TODO: Skip cutoff if the loop is guaranteed to execute, e.g. due to
+ // profile info.
+ CostTooHigh =
+ LAI.getNumRuntimePointerChecks() > VectorizeMemoryCheckThreshold;
+ if (CostTooHigh)
+ return;
+
BasicBlock *LoopHeader = L->getHeader();
BasicBlock *Preheader = L->getLoopPreheader();
@@ -1938,6 +1967,44 @@ public:
}
}
+ InstructionCost getCost() {
+ if (SCEVCheckBlock || MemCheckBlock)
+ LLVM_DEBUG(dbgs() << "Calculating cost of runtime checks:\n");
+
+ if (CostTooHigh) {
+ InstructionCost Cost;
+ Cost.setInvalid();
+ LLVM_DEBUG(dbgs() << " number of checks exceeded threshold\n");
+ return Cost;
+ }
+
+ InstructionCost RTCheckCost = 0;
+ if (SCEVCheckBlock)
+ for (Instruction &I : *SCEVCheckBlock) {
+ if (SCEVCheckBlock->getTerminator() == &I)
+ continue;
+ InstructionCost C =
+ TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput);
+ LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
+ RTCheckCost += C;
+ }
+ if (MemCheckBlock)
+ for (Instruction &I : *MemCheckBlock) {
+ if (MemCheckBlock->getTerminator() == &I)
+ continue;
+ InstructionCost C =
+ TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput);
+ LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n");
+ RTCheckCost += C;
+ }
+
+ if (SCEVCheckBlock || MemCheckBlock)
+ LLVM_DEBUG(dbgs() << "Total cost of runtime checks: " << RTCheckCost
+ << "\n");
+
+ return RTCheckCost;
+ }
+
/// Remove the created SCEV & memory runtime check blocks & instructions, if
/// unused.
~GeneratedRTChecks() {
@@ -2880,9 +2947,16 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// If tail is to be folded, vector loop takes care of all iterations.
Type *CountTy = Count->getType();
Value *CheckMinIters = Builder.getFalse();
- Value *Step = createStepForVF(Builder, CountTy, VF, UF);
+ auto CreateStep = [&]() {
+ // Create step with max(MinProTripCount, UF * VF).
+ if (UF * VF.getKnownMinValue() < MinProfitableTripCount.getKnownMinValue())
+ return createStepForVF(Builder, CountTy, MinProfitableTripCount, 1);
+ return createStepForVF(Builder, CountTy, VF, UF);
+ };
+
if (!Cost->foldTailByMasking())
- CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
+ CheckMinIters =
+ Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");
else if (VF.isScalable()) {
// vscale is not necessarily a power-of-2, which means we cannot guarantee
// an overflow to zero when updating induction variables and so an
@@ -2894,8 +2968,9 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
Value *LHS = Builder.CreateSub(MaxUIntTripCount, Count);
// Don't execute the vector loop if (UMax - n) < (VF * UF).
- CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, Step);
+ CheckMinIters = Builder.CreateICmp(ICmpInst::ICMP_ULT, LHS, CreateStep());
}
+
// Create new preheader for vector loop.
LoopVectorPreHeader =
SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr,
@@ -2920,7 +2995,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
-
BasicBlock *const SCEVCheckBlock =
RTChecks.emitSCEVChecks(Bypass, LoopVectorPreHeader, LoopExitBlock);
if (!SCEVCheckBlock)
@@ -4792,7 +4866,7 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
MaxVScale =
TheFunction->getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
MaxScalableVF = ElementCount::getScalable(
- MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
+ MaxVScale ? (MaxSafeElements / MaxVScale.value()) : 0);
if (!MaxScalableVF)
reportVectorizationInfo(
"Max legal vector width too small, scalable vectorization "
@@ -5187,9 +5261,9 @@ bool LoopVectorizationCostModel::isMoreProfitable(
unsigned EstimatedWidthB = B.Width.getKnownMinValue();
if (Optional<unsigned> VScale = getVScaleForTuning()) {
if (A.Width.isScalable())
- EstimatedWidthA *= VScale.getValue();
+ EstimatedWidthA *= VScale.value();
if (B.Width.isScalable())
- EstimatedWidthB *= VScale.getValue();
+ EstimatedWidthB *= VScale.value();
}
// Assume vscale may be larger than 1 (or the value being tuned for),
@@ -5872,10 +5946,11 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
- auto GetRegUsage = [&TTI = TTI](Type *Ty, ElementCount VF) -> unsigned {
+ const auto &TTICapture = TTI;
+ auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty))
return 0;
- return TTI.getRegUsageForType(VectorType::get(Ty, VF));
+ return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
};
for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {
@@ -6014,6 +6089,8 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// map will indicate that we've analyzed it already.
ScalarCostsTy &ScalarCostsVF = InstsToScalarize[VF];
+ PredicatedBBsAfterVectorization[VF].clear();
+
// Find all the instructions that are scalar with predication in the loop and
// determine if it would be better to not if-convert the blocks they are in.
// If so, we also record the instructions to scalarize.
@@ -6031,7 +6108,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
// Remember that BB will remain after vectorization.
- PredicatedBBsAfterVectorization.insert(BB);
+ PredicatedBBsAfterVectorization[VF].insert(BB);
}
}
}
@@ -6896,8 +6973,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
bool ScalarPredicatedBB = false;
BranchInst *BI = cast<BranchInst>(I);
if (VF.isVector() && BI->isConditional() &&
- (PredicatedBBsAfterVectorization.count(BI->getSuccessor(0)) ||
- PredicatedBBsAfterVectorization.count(BI->getSuccessor(1))))
+ (PredicatedBBsAfterVectorization[VF].count(BI->getSuccessor(0)) ||
+ PredicatedBBsAfterVectorization[VF].count(BI->getSuccessor(1))))
ScalarPredicatedBB = true;
if (ScalarPredicatedBB) {
@@ -7363,14 +7440,6 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
return VectorizationFactor::Disabled();
}
-bool LoopVectorizationPlanner::requiresTooManyRuntimeChecks() const {
- unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
- return (NumRuntimePointerChecks >
- VectorizerParams::RuntimeMemoryCheckThreshold &&
- !Hints.allowReordering()) ||
- NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
-}
-
Optional<VectorizationFactor>
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
@@ -7439,7 +7508,9 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
- return CM.selectVectorizationFactor(VFCandidates);
+ VectorizationFactor VF = CM.selectVectorizationFactor(VFCandidates);
+ assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
+ return VF;
}
VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
@@ -7554,7 +7625,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
if (VectorizedLoopID)
- L->setLoopID(VectorizedLoopID.getValue());
+ L->setLoopID(VectorizedLoopID.value());
else {
// Keep all loop hints from the original loop on the vector loop (we'll
// replace the vectorizer-specific hints below).
@@ -7585,51 +7656,6 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
-void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
- SmallPtrSetImpl<Instruction *> &DeadInstructions) {
-
- // We create new control-flow for the vectorized loop, so the original exit
- // conditions will be dead after vectorization if it's only used by the
- // terminator
- SmallVector<BasicBlock*> ExitingBlocks;
- OrigLoop->getExitingBlocks(ExitingBlocks);
- for (auto *BB : ExitingBlocks) {
- auto *Cmp = dyn_cast<Instruction>(BB->getTerminator()->getOperand(0));
- if (!Cmp || !Cmp->hasOneUse())
- continue;
-
- // TODO: we should introduce a getUniqueExitingBlocks on Loop
- if (!DeadInstructions.insert(Cmp).second)
- continue;
-
- // The operands of the icmp is often a dead trunc, used by IndUpdate.
- // TODO: can recurse through operands in general
- for (Value *Op : Cmp->operands()) {
- if (isa<TruncInst>(Op) && Op->hasOneUse())
- DeadInstructions.insert(cast<Instruction>(Op));
- }
- }
-
- // We create new "steps" for induction variable updates to which the original
- // induction variables map. An original update instruction will be dead if
- // all its users except the induction variable are dead.
- auto *Latch = OrigLoop->getLoopLatch();
- for (auto &Induction : Legal->getInductionVars()) {
- PHINode *Ind = Induction.first;
- auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
-
- // If the tail is to be folded by masking, the primary induction variable,
- // if exists, isn't dead: it will be used for masking. Don't kill it.
- if (CM.foldTailByMasking() && IndUpdate == Legal->getPrimaryInduction())
- continue;
-
- if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
- return U == Ind || DeadInstructions.count(cast<Instruction>(U));
- }))
- DeadInstructions.insert(IndUpdate);
- }
-}
-
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
//===--------------------------------------------------------------------===//
@@ -8001,11 +8027,19 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
if (!CM.blockNeedsPredicationForAnyReason(BB))
return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
+ assert(CM.foldTailByMasking() && "must fold the tail");
+
+ // If we're using the active lane mask for control flow, then we get the
+ // mask from the active lane mask PHI that is cached in the VPlan.
+ PredicationStyle EmitGetActiveLaneMask = CM.TTI.emitGetActiveLaneMask();
+ if (EmitGetActiveLaneMask == PredicationStyle::DataAndControlFlow)
+ return BlockMaskCache[BB] = Plan->getActiveLaneMaskPhi();
+
// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
// constructing the desired canonical IV in the header block as its first
// non-phi instructions.
- assert(CM.foldTailByMasking() && "must fold the tail");
+
VPBasicBlock *HeaderVPBB =
Plan->getVectorLoopRegion()->getEntryBasicBlock();
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
@@ -8014,9 +8048,10 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
- if (CM.TTI.emitGetActiveLaneMask()) {
+ if (EmitGetActiveLaneMask != PredicationStyle::None) {
VPValue *TC = Plan->getOrCreateTripCount();
- BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC});
+ BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC},
+ nullptr, "active.lane.mask");
} else {
VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
@@ -8409,9 +8444,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
return RegSucc;
}
-VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
- VPRecipeBase *PredRecipe,
- VPlanPtr &Plan) {
+VPRegionBlock *VPRecipeBuilder::createReplicateRegion(
+ Instruction *Instr, VPReplicateRecipe *PredRecipe, VPlanPtr &Plan) {
// Instructions marked for predication are replicated and placed under an
// if-then construct to prevent side-effects.
@@ -8425,7 +8459,7 @@ VPRegionBlock *VPRecipeBuilder::createReplicateRegion(Instruction *Instr,
auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
auto *PHIRecipe = Instr->getType()->isVoidTy()
? nullptr
- : new VPPredInstPHIRecipe(Plan->getOrAddVPValue(Instr));
+ : new VPPredInstPHIRecipe(PredRecipe);
if (PHIRecipe) {
Plan->removeVPValueFor(Instr);
Plan->addVPValue(Instr, PHIRecipe);
@@ -8517,19 +8551,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
ElementCount MaxVF) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
- // Collect instructions from the original loop that will become trivially dead
- // in the vectorized loop. We don't need to vectorize these instructions. For
- // example, original induction update instructions can become dead because we
- // separately emit induction "steps" when generating code for the new loop.
- // Similarly, we create a new latch condition when setting up the structure
- // of the new loop, so the old one can become dead.
- SmallPtrSet<Instruction *, 4> DeadInstructions;
- collectTriviallyDeadInstructions(DeadInstructions);
-
// Add assume instructions we need to drop to DeadInstructions, to prevent
// them from being added to the VPlan.
// TODO: We only need to drop assumes in blocks that get flattend. If the
// control flow is preserved, we should keep them.
+ SmallPtrSet<Instruction *, 4> DeadInstructions;
auto &ConditionalAssumes = Legal->getConditionalAssumes();
DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end());
@@ -8565,32 +8591,84 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
}
}
-// Add a VPCanonicalIVPHIRecipe starting at 0 to the header, a
-// CanonicalIVIncrement{NUW} VPInstruction to increment it by VF * UF and a
-// BranchOnCount VPInstruction to the latch.
+// Add the necessary canonical IV and branch recipes required to control the
+// loop.
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
- bool HasNUW) {
+ bool HasNUW,
+ bool UseLaneMaskForLoopControlFlow) {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
auto *StartV = Plan.getOrAddVPValue(StartIdx);
+ // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
VPBasicBlock *Header = TopRegion->getEntryBasicBlock();
Header->insert(CanonicalIVPHI, Header->begin());
+ // Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
+ // IV by VF * UF.
auto *CanonicalIVIncrement =
new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementNUW
: VPInstruction::CanonicalIVIncrement,
- {CanonicalIVPHI}, DL);
+ {CanonicalIVPHI}, DL, "index.next");
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
EB->appendRecipe(CanonicalIVIncrement);
- auto *BranchOnCount =
- new VPInstruction(VPInstruction::BranchOnCount,
- {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
- EB->appendRecipe(BranchOnCount);
+ if (UseLaneMaskForLoopControlFlow) {
+ // Create the active lane mask instruction in the vplan preheader.
+ VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
+
+ // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
+ // we have to take unrolling into account. Each part needs to start at
+ // Part * VF
+ auto *CanonicalIVIncrementParts =
+ new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
+ : VPInstruction::CanonicalIVIncrementForPart,
+ {StartV}, DL, "index.part.next");
+ Preheader->appendRecipe(CanonicalIVIncrementParts);
+
+ // Create the ActiveLaneMask instruction using the correct start values.
+ VPValue *TC = Plan.getOrCreateTripCount();
+ auto *EntryALM = new VPInstruction(VPInstruction::ActiveLaneMask,
+ {CanonicalIVIncrementParts, TC}, DL,
+ "active.lane.mask.entry");
+ Preheader->appendRecipe(EntryALM);
+
+ // Now create the ActiveLaneMaskPhi recipe in the main loop using the
+ // preheader ActiveLaneMask instruction.
+ auto *LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
+ Header->insert(LaneMaskPhi, Header->getFirstNonPhi());
+
+ // Create the active lane mask for the next iteration of the loop.
+ CanonicalIVIncrementParts =
+ new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
+ : VPInstruction::CanonicalIVIncrementForPart,
+ {CanonicalIVIncrement}, DL);
+ EB->appendRecipe(CanonicalIVIncrementParts);
+
+ auto *ALM = new VPInstruction(VPInstruction::ActiveLaneMask,
+ {CanonicalIVIncrementParts, TC}, DL,
+ "active.lane.mask.next");
+ EB->appendRecipe(ALM);
+ LaneMaskPhi->addOperand(ALM);
+
+ // We have to invert the mask here because a true condition means jumping
+ // to the exit block.
+ auto *NotMask = new VPInstruction(VPInstruction::Not, ALM, DL);
+ EB->appendRecipe(NotMask);
+
+ VPInstruction *BranchBack =
+ new VPInstruction(VPInstruction::BranchOnCond, {NotMask}, DL);
+ EB->appendRecipe(BranchBack);
+ } else {
+ // Add the BranchOnCount VPInstruction to the latch.
+ VPInstruction *BranchBack = new VPInstruction(
+ VPInstruction::BranchOnCount,
+ {CanonicalIVIncrement, &Plan.getVectorTripCount()}, DL);
+ EB->appendRecipe(BranchBack);
+ }
}
// Add exit values to \p Plan. VPLiveOuts are added for each LCSSA phi in the
@@ -8691,7 +8769,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),
DLInst ? DLInst->getDebugLoc() : DebugLoc(),
- !CM.foldTailByMasking());
+ !CM.foldTailByMasking(),
+ CM.useActiveLaneMaskForControlFlow());
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
@@ -8961,8 +9040,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE());
VPlanTransforms::sinkScalarOperands(*Plan);
- VPlanTransforms::mergeReplicateRegions(*Plan);
VPlanTransforms::removeDeadRecipes(*Plan);
+ VPlanTransforms::mergeReplicateRegions(*Plan);
VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan);
// Fold Exit block into its predecessor if possible.
@@ -9006,7 +9085,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
Term->eraseFromParent();
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
- true);
+ true, CM.useActiveLaneMaskForControlFlow());
return Plan;
}
@@ -9078,7 +9157,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
Plan->removeVPValueFor(R);
Plan->addVPValue(R, RedRecipe);
- WidenRecipe->getParent()->insert(RedRecipe, WidenRecipe->getIterator());
+ // Append the recipe to the end of the VPBasicBlock because we need to
+ // ensure that it comes after all of it's inputs, including CondOp.
+ WidenRecipe->getParent()->appendRecipe(RedRecipe);
WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
WidenRecipe->eraseFromParent();
@@ -9151,229 +9232,6 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
*this, State);
}
-void VPWidenSelectRecipe::execute(VPTransformState &State) {
- auto &I = *cast<SelectInst>(getUnderlyingInstr());
- State.setDebugLocFromInst(&I);
-
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
- auto *InvarCond =
- InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
- Value *Op0 = State.get(getOperand(1), Part);
- Value *Op1 = State.get(getOperand(2), Part);
- Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
- State.set(this, Sel, Part);
- State.addMetadata(Sel, &I);
- }
-}
-
-void VPWidenRecipe::execute(VPTransformState &State) {
- auto &I = *cast<Instruction>(getUnderlyingValue());
- auto &Builder = State.Builder;
- switch (I.getOpcode()) {
- case Instruction::Call:
- case Instruction::Br:
- case Instruction::PHI:
- case Instruction::GetElementPtr:
- case Instruction::Select:
- llvm_unreachable("This instruction is handled by a different recipe.");
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::FNeg:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // Just widen unops and binops.
- State.setDebugLocFromInst(&I);
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- SmallVector<Value *, 2> Ops;
- for (VPValue *VPOp : operands())
- Ops.push_back(State.get(VPOp, Part));
-
- Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
-
- if (auto *VecOp = dyn_cast<Instruction>(V)) {
- VecOp->copyIRFlags(&I);
-
- // If the instruction is vectorized and was in a basic block that needed
- // predication, we can't propagate poison-generating flags (nuw/nsw,
- // exact, etc.). The control flow has been linearized and the
- // instruction is no longer guarded by the predicate, which could make
- // the flag properties to no longer hold.
- if (State.MayGeneratePoisonRecipes.contains(this))
- VecOp->dropPoisonGeneratingFlags();
- }
-
- // Use this vector value for all users of the original instruction.
- State.set(this, V, Part);
- State.addMetadata(V, &I);
- }
-
- break;
- }
- case Instruction::Freeze: {
- State.setDebugLocFromInst(&I);
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Op = State.get(getOperand(0), Part);
-
- Value *Freeze = Builder.CreateFreeze(Op);
- State.set(this, Freeze, Part);
- }
- break;
- }
- case Instruction::ICmp:
- case Instruction::FCmp: {
- // Widen compares. Generate vector compares.
- bool FCmp = (I.getOpcode() == Instruction::FCmp);
- auto *Cmp = cast<CmpInst>(&I);
- State.setDebugLocFromInst(Cmp);
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *A = State.get(getOperand(0), Part);
- Value *B = State.get(getOperand(1), Part);
- Value *C = nullptr;
- if (FCmp) {
- // Propagate fast math flags.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(Cmp->getFastMathFlags());
- C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
- } else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
- }
- State.set(this, C, Part);
- State.addMetadata(C, &I);
- }
-
- break;
- }
-
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- auto *CI = cast<CastInst>(&I);
- State.setDebugLocFromInst(CI);
-
- /// Vectorize casts.
- Type *DestTy = (State.VF.isScalar())
- ? CI->getType()
- : VectorType::get(CI->getType(), State.VF);
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *A = State.get(getOperand(0), Part);
- Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- State.set(this, Cast, Part);
- State.addMetadata(Cast, &I);
- }
- break;
- }
- default:
- // This instruction is not vectorized by simple widening.
- LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
- llvm_unreachable("Unhandled instruction!");
- } // end of switch.
-}
-
-void VPWidenGEPRecipe::execute(VPTransformState &State) {
- auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
- // Construct a vector GEP by widening the operands of the scalar GEP as
- // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
- // results in a vector of pointers when at least one operand of the GEP
- // is vector-typed. Thus, to keep the representation compact, we only use
- // vector-typed operands for loop-varying values.
-
- if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
- // If we are vectorizing, but the GEP has only loop-invariant operands,
- // the GEP we build (by only using vector-typed operands for
- // loop-varying values) would be a scalar pointer. Thus, to ensure we
- // produce a vector of pointers, we need to either arbitrarily pick an
- // operand to broadcast, or broadcast a clone of the original GEP.
- // Here, we broadcast a clone of the original.
- //
- // TODO: If at some point we decide to scalarize instructions having
- // loop-invariant operands, this special case will no longer be
- // required. We would add the scalarization decision to
- // collectLoopScalars() and teach getVectorValue() to broadcast
- // the lane-zero scalar value.
- auto *Clone = State.Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
- State.set(this, EntryPart, Part);
- State.addMetadata(EntryPart, GEP);
- }
- } else {
- // If the GEP has at least one loop-varying operand, we are sure to
- // produce a vector of pointers. But if we are only unrolling, we want
- // to produce a scalar GEP for each unroll part. Thus, the GEP we
- // produce with the code below will be scalar (if VF == 1) or vector
- // (otherwise). Note that for the unroll-only case, we still maintain
- // values in the vector mapping with initVector, as we do for other
- // instructions.
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- // The pointer operand of the new GEP. If it's loop-invariant, we
- // won't broadcast it.
- auto *Ptr = IsPtrLoopInvariant
- ? State.get(getOperand(0), VPIteration(0, 0))
- : State.get(getOperand(0), Part);
-
- // Collect all the indices for the new GEP. If any index is
- // loop-invariant, we won't broadcast it.
- SmallVector<Value *, 4> Indices;
- for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
- VPValue *Operand = getOperand(I);
- if (IsIndexLoopInvariant[I - 1])
- Indices.push_back(State.get(Operand, VPIteration(0, 0)));
- else
- Indices.push_back(State.get(Operand, Part));
- }
-
- // If the GEP instruction is vectorized and was in a basic block that
- // needed predication, we can't propagate the poison-generating 'inbounds'
- // flag. The control flow has been linearized and the GEP is no longer
- // guarded by the predicate, which could make the 'inbounds' properties to
- // no longer hold.
- bool IsInBounds =
- GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
-
- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
- // but it should be a vector, otherwise.
- auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
- Indices, "", IsInBounds);
- assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
- "NewGEP is not a pointer vector");
- State.set(this, NewGEP, Part);
- State.addMetadata(NewGEP, GEP);
- }
- }
-}
-
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Int or FP induction being replicated.");
@@ -9632,45 +9490,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
}
}
-void VPBlendRecipe::execute(VPTransformState &State) {
- State.setDebugLocFromInst(Phi);
- // We know that all PHIs in non-header blocks are converted into
- // selects, so we don't have to worry about the insertion order and we
- // can just use the builder.
- // At this point we generate the predication tree. There may be
- // duplications since this is a simple recursive scan, but future
- // optimizations will clean it up.
-
- unsigned NumIncoming = getNumIncomingValues();
-
- // Generate a sequence of selects of the form:
- // SELECT(Mask3, In3,
- // SELECT(Mask2, In2,
- // SELECT(Mask1, In1,
- // In0)))
- // Note that Mask0 is never used: lanes for which no path reaches this phi and
- // are essentially undef are taken from In0.
- InnerLoopVectorizer::VectorParts Entry(State.UF);
- for (unsigned In = 0; In < NumIncoming; ++In) {
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- // We might have single edge PHIs (blocks) - use an identity
- // 'select' for the first PHI operand.
- Value *In0 = State.get(getIncomingValue(In), Part);
- if (In == 0)
- Entry[Part] = In0; // Initialize with the first incoming value.
- else {
- // Select between the current value and the previous incoming edge
- // based on the incoming mask.
- Value *Cond = State.get(getMask(In), Part);
- Entry[Part] =
- State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
- }
- }
- }
- for (unsigned Part = 0; Part < State.UF; ++Part)
- State.set(this, Entry[Part], Part);
-}
-
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),
@@ -9758,32 +9577,6 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
State);
}
-void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
- assert(State.Instance && "Branch on Mask works only on single instance.");
-
- unsigned Part = State.Instance->Part;
- unsigned Lane = State.Instance->Lane.getKnownLane();
-
- Value *ConditionBit = nullptr;
- VPValue *BlockInMask = getMask();
- if (BlockInMask) {
- ConditionBit = State.get(BlockInMask, Part);
- if (ConditionBit->getType()->isVectorTy())
- ConditionBit = State.Builder.CreateExtractElement(
- ConditionBit, State.Builder.getInt32(Lane));
- } else // Block in mask is all-one.
- ConditionBit = State.Builder.getTrue();
-
- // Replace the temporary unreachable terminator with a new conditional branch,
- // whose two destinations will be set later when they are created.
- auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
- assert(isa<UnreachableInst>(CurrentTerminator) &&
- "Expected to replace unreachable terminator with conditional branch.");
- auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
- CondBr->setSuccessor(0, nullptr);
- ReplaceInstWithInst(CurrentTerminator, CondBr);
-}
-
void VPPredInstPHIRecipe::execute(VPTransformState &State) {
assert(State.Instance && "Predicated instruction PHI works per instance.");
Instruction *ScalarPredInst =
@@ -10103,8 +9896,7 @@ static bool processLoopInVPlanNativePath(
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints,
- Requirements, ORE);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, ORE);
// Get user vectorization factor.
ElementCount UserVF = Hints.getWidth();
@@ -10123,10 +9915,10 @@ static bool processLoopInVPlanNativePath(
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
{
- GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getParent()->getDataLayout());
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
- &CM, BFI, PSI, Checks);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
+ VF.Width, 1, LVL, &CM, BFI, PSI, Checks);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
@@ -10183,6 +9975,105 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
}
}
+static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
+ VectorizationFactor &VF,
+ Optional<unsigned> VScale, Loop *L,
+ ScalarEvolution &SE) {
+ InstructionCost CheckCost = Checks.getCost();
+ if (!CheckCost.isValid())
+ return false;
+
+ // When interleaving only scalar and vector cost will be equal, which in turn
+ // would lead to a divide by 0. Fall back to hard threshold.
+ if (VF.Width.isScalar()) {
+ if (CheckCost > VectorizeMemoryCheckThreshold) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Interleaving only is not profitable due to runtime checks\n");
+ return false;
+ }
+ return true;
+ }
+
+ // The scalar cost should only be 0 when vectorizing with a user specified VF/IC. In those cases, runtime checks should always be generated.
+ double ScalarC = *VF.ScalarCost.getValue();
+ if (ScalarC == 0)
+ return true;
+
+ // First, compute the minimum iteration count required so that the vector
+ // loop outperforms the scalar loop.
+ // The total cost of the scalar loop is
+ // ScalarC * TC
+ // where
+ // * TC is the actual trip count of the loop.
+ // * ScalarC is the cost of a single scalar iteration.
+ //
+ // The total cost of the vector loop is
+ // RtC + VecC * (TC / VF) + EpiC
+ // where
+ // * RtC is the cost of the generated runtime checks
+ // * VecC is the cost of a single vector iteration.
+ // * TC is the actual trip count of the loop
+ // * VF is the vectorization factor
+ // * EpiCost is the cost of the generated epilogue, including the cost
+ // of the remaining scalar operations.
+ //
+ // Vectorization is profitable once the total vector cost is less than the
+ // total scalar cost:
+ // RtC + VecC * (TC / VF) + EpiC < ScalarC * TC
+ //
+ // Now we can compute the minimum required trip count TC as
+ // (RtC + EpiC) / (ScalarC - (VecC / VF)) < TC
+ //
+ // For now we assume the epilogue cost EpiC = 0 for simplicity. Note that
+ // the computations are performed on doubles, not integers and the result
+ // is rounded up, hence we get an upper estimate of the TC.
+ unsigned IntVF = VF.Width.getKnownMinValue();
+ if (VF.Width.isScalable()) {
+ unsigned AssumedMinimumVscale = 1;
+ if (VScale)
+ AssumedMinimumVscale = *VScale;
+ IntVF *= AssumedMinimumVscale;
+ }
+ double VecCOverVF = double(*VF.Cost.getValue()) / IntVF;
+ double RtC = *CheckCost.getValue();
+ double MinTC1 = RtC / (ScalarC - VecCOverVF);
+
+ // Second, compute a minimum iteration count so that the cost of the
+ // runtime checks is only a fraction of the total scalar loop cost. This
+ // adds a loop-dependent bound on the overhead incurred if the runtime
+ // checks fail. In case the runtime checks fail, the cost is RtC + ScalarC
+ // * TC. To bound the runtime check to be a fraction 1/X of the scalar
+ // cost, compute
+ // RtC < ScalarC * TC * (1 / X) ==> RtC * X / ScalarC < TC
+ double MinTC2 = RtC * 10 / ScalarC;
+
+ // Now pick the larger minimum. If it is not a multiple of VF, choose the
+ // next closest multiple of VF. This should partly compensate for ignoring
+ // the epilogue cost.
+ uint64_t MinTC = std::ceil(std::max(MinTC1, MinTC2));
+ VF.MinProfitableTripCount = ElementCount::getFixed(alignTo(MinTC, IntVF));
+
+ LLVM_DEBUG(
+ dbgs() << "LV: Minimum required TC for runtime checks to be profitable:"
+ << VF.MinProfitableTripCount << "\n");
+
+ // Skip vectorization if the expected trip count is less than the minimum
+ // required trip count.
+ if (auto ExpectedTC = getSmallBestKnownTC(SE, L)) {
+ if (ElementCount::isKnownLT(ElementCount::getFixed(*ExpectedTC),
+ VF.MinProfitableTripCount)) {
+ LLVM_DEBUG(dbgs() << "LV: Vectorization is not beneficial: expected "
+ "trip count < minimum profitable VF ("
+ << *ExpectedTC << " < " << VF.MinProfitableTripCount
+ << ")\n");
+
+ return false;
+ }
+ }
+ return true;
+}
+
LoopVectorizePass::LoopVectorizePass(LoopVectorizeOptions Opts)
: InterleaveOnlyWhenForced(Opts.InterleaveOnlyWhenForced ||
!EnableLoopInterleaving),
@@ -10340,8 +10231,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
CM.collectElementTypesForWidening();
// Use the planner for vectorization.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints,
- Requirements, ORE);
+ LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, ORE);
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
@@ -10353,10 +10243,25 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;
- GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+ GeneratedRTChecks Checks(*PSE.getSE(), DT, LI, TTI,
F->getParent()->getDataLayout());
if (MaybeVF) {
- if (LVP.requiresTooManyRuntimeChecks()) {
+ VF = *MaybeVF;
+ // Select the interleave count.
+ IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
+
+ unsigned SelectedIC = std::max(IC, UserIC);
+ // Optimistically generate runtime checks if they are needed. Drop them if
+ // they turn out to not be profitable.
+ if (VF.Width.isVector() || SelectedIC > 1)
+ Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
+
+ // Check if it is profitable to vectorize with runtime checks.
+ bool ForceVectorization =
+ Hints.getForce() == LoopVectorizeHints::FK_Enabled;
+ if (!ForceVectorization &&
+ !areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L,
+ *PSE.getSE())) {
ORE->emit([&]() {
return OptimizationRemarkAnalysisAliasing(
DEBUG_TYPE, "CantReorderMemOps", L->getStartLoc(),
@@ -10368,15 +10273,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Hints.emitRemarkWithHints();
return false;
}
- VF = *MaybeVF;
- // Select the interleave count.
- IC = CM.selectInterleaveCount(VF.Width, *VF.Cost.getValue());
-
- unsigned SelectedIC = std::max(IC, UserIC);
- // Optimistically generate runtime checks if they are needed. Drop them if
- // they turn out to not be profitable.
- if (VF.Width.isVector() || SelectedIC > 1)
- Checks.Create(L, *LVL.getLAI(), PSE.getPredicate(), VF.Width, SelectedIC);
}
// Identify the diagnostic messages that should be produced.
@@ -10533,8 +10429,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!MainILV.areSafetyChecksAdded())
DisableRuntimeUnroll = true;
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
- &LVL, &CM, BFI, PSI, Checks);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
+ VF.MinProfitableTripCount, IC, &LVL, &CM, BFI,
+ PSI, Checks);
VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
@@ -10564,7 +10461,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
if (RemainderLoopID) {
- L->setLoopID(RemainderLoopID.getValue());
+ L->setLoopID(RemainderLoopID.value());
} else {
if (DisableRuntimeUnroll)
AddRuntimeUnrollDisableMetaData(L);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 019a09665a67..e136cd9aedac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2637,7 +2637,7 @@ private:
AliasCacheKey key = std::make_pair(Inst1, Inst2);
Optional<bool> &result = AliasCache[key];
if (result) {
- return result.getValue();
+ return result.value();
}
bool aliased = true;
if (Loc1.Ptr && isSimple(Inst1))
@@ -4592,7 +4592,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
};
InstructionsState S = getSameOpcode(VL);
- if (Depth == RecursionMaxDepth) {
+
+ // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
+ // a load), in which case peek through to include it in the tree, without
+ // ballooning over-budget.
+ if (Depth >= RecursionMaxDepth &&
+ !(S.MainOp && isa<Instruction>(S.MainOp) && S.MainOp == S.AltOp &&
+ VL.size() >= 4 &&
+ (match(S.MainOp, m_Load(m_Value())) || all_of(VL, [&S](const Value *I) {
+ return match(I,
+ m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) &&
+ cast<Instruction>(I)->getOpcode() ==
+ cast<Instruction>(S.MainOp)->getOpcode();
+ })))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
@@ -11217,7 +11229,7 @@ public:
return OptimizationRemarkMissed(
SV_NAME, "HorSLPNotBeneficial",
ReducedValsToOps.find(VL[0])->second.front())
- << "Vectorizing horizontal reduction is possible"
+ << "Vectorizing horizontal reduction is possible "
<< "but not beneficial with cost " << ore::NV("Cost", Cost)
<< " and threshold "
<< ore::NV("Threshold", -SLPCostThreshold);
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 97f2b1a93815..c7949c42c03e 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -159,7 +159,8 @@ public:
/// Create a replicating region for instruction \p I that requires
/// predication. \p PredRecipe is a VPReplicateRecipe holding \p I.
- VPRegionBlock *createReplicateRegion(Instruction *I, VPRecipeBase *PredRecipe,
+ VPRegionBlock *createReplicateRegion(Instruction *I,
+ VPReplicateRecipe *PredRecipe,
VPlanPtr &Plan);
/// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 4d709097c306..30032dda7f60 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -248,25 +248,27 @@ void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
}
void VPTransformState::setDebugLocFromInst(const Value *V) {
- if (const Instruction *Inst = dyn_cast_or_null<Instruction>(V)) {
- const DILocation *DIL = Inst->getDebugLoc();
-
- // When a FSDiscriminator is enabled, we don't need to add the multiply
- // factors to the discriminators.
- if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
- !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
- // FIXME: For scalable vectors, assume vscale=1.
- auto NewDIL =
- DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
- if (NewDIL)
- Builder.SetCurrentDebugLocation(*NewDIL);
- else
- LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
- << DIL->getFilename() << " Line: " << DIL->getLine());
- } else
- Builder.SetCurrentDebugLocation(DIL);
- } else
+ const Instruction *Inst = dyn_cast<Instruction>(V);
+ if (!Inst) {
Builder.SetCurrentDebugLocation(DebugLoc());
+ return;
+ }
+
+ const DILocation *DIL = Inst->getDebugLoc();
+ // When a FSDiscriminator is enabled, we don't need to add the multiply
+ // factors to the discriminators.
+ if (DIL && Inst->getFunction()->isDebugInfoForProfiling() &&
+ !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
+ // FIXME: For scalable vectors, assume vscale=1.
+ auto NewDIL =
+ DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
+ if (NewDIL)
+ Builder.SetCurrentDebugLocation(*NewDIL);
+ else
+ LLVM_DEBUG(dbgs() << "Failed to create new discriminator: "
+ << DIL->getFilename() << " Line: " << DIL->getLine());
+ } else
+ Builder.SetCurrentDebugLocation(DIL);
}
BasicBlock *
@@ -566,6 +568,24 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
+ VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
+ for (VPRecipeBase &R : Header->phis()) {
+ if (isa<VPActiveLaneMaskPHIRecipe>(&R))
+ return cast<VPActiveLaneMaskPHIRecipe>(&R);
+ }
+ return nullptr;
+}
+
+static bool canSimplifyBranchOnCond(VPInstruction *Term) {
+ VPInstruction *Not = dyn_cast<VPInstruction>(Term->getOperand(0));
+ if (!Not || Not->getOpcode() != VPInstruction::Not)
+ return false;
+
+ VPInstruction *ALM = dyn_cast<VPInstruction>(Not->getOperand(0));
+ return ALM && ALM->getOpcode() == VPInstruction::ActiveLaneMask;
+}
+
void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
Value *CanonicalIVStartValue,
VPTransformState &State,
@@ -573,11 +593,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
VPBasicBlock *ExitingVPBB = getVectorLoopRegion()->getExitingBasicBlock();
auto *Term = dyn_cast<VPInstruction>(&ExitingVPBB->back());
- // Try to simplify BranchOnCount to 'BranchOnCond true' if TC <= VF * UF when
- // preparing to execute the plan for the main vector loop.
- if (!IsEpilogueVectorization && Term &&
- Term->getOpcode() == VPInstruction::BranchOnCount &&
- isa<ConstantInt>(TripCountV)) {
+ // Try to simplify the branch condition if TC <= VF * UF when preparing to
+ // execute the plan for the main vector loop. We only do this if the
+ // terminator is:
+ // 1. BranchOnCount, or
+ // 2. BranchOnCond where the input is Not(ActiveLaneMask).
+ if (!IsEpilogueVectorization && Term && isa<ConstantInt>(TripCountV) &&
+ (Term->getOpcode() == VPInstruction::BranchOnCount ||
+ (Term->getOpcode() == VPInstruction::BranchOnCond &&
+ canSimplifyBranchOnCond(Term)))) {
ConstantInt *C = cast<ConstantInt>(TripCountV);
uint64_t TCVal = C->getZExtValue();
if (TCVal && TCVal <= State.VF.getKnownMinValue() * State.UF) {
@@ -697,7 +721,8 @@ void VPlan::execute(VPTransformState *State) {
// generated.
bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
- cast<VPReductionPHIRecipe>(PhiR)->isOrdered();
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 09da4a545d0d..f009a7ee6b4b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -784,6 +784,10 @@ public:
ActiveLaneMask,
CanonicalIVIncrement,
CanonicalIVIncrementNUW,
+ // The next two are similar to the above, but instead increment the
+ // canonical IV separately for each unrolled part.
+ CanonicalIVIncrementForPart,
+ CanonicalIVIncrementForPartNUW,
BranchOnCount,
BranchOnCond
};
@@ -794,6 +798,9 @@ private:
FastMathFlags FMF;
DebugLoc DL;
+ /// An optional name that can be used for the generated IR instruction.
+ const std::string Name;
+
/// Utility method serving execute(): generates a single instance of the
/// modeled instruction.
void generateInstruction(VPTransformState &State, unsigned Part);
@@ -802,14 +809,15 @@ protected:
void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
public:
- VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL)
+ VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands, DebugLoc DL,
+ const Twine &Name = "")
: VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode),
- DL(DL) {}
+ DL(DL), Name(Name.str()) {}
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
- DebugLoc DL = {})
- : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL) {}
+ DebugLoc DL = {}, const Twine &Name = "")
+ : VPInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name) {}
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPValue *V) {
@@ -818,7 +826,7 @@ public:
VPInstruction *clone() const {
SmallVector<VPValue *, 2> Operands(operands());
- return new VPInstruction(Opcode, Operands, DL);
+ return new VPInstruction(Opcode, Operands, DL, Name);
}
/// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -897,6 +905,8 @@ public:
case VPInstruction::ActiveLaneMask:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW:
+ case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::CanonicalIVIncrementForPartNUW:
case VPInstruction::BranchOnCount:
return true;
};
@@ -1125,6 +1135,7 @@ public:
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
return B->getVPDefID() == VPRecipeBase::VPCanonicalIVPHISC ||
+ B->getVPDefID() == VPRecipeBase::VPActiveLaneMaskPHISC ||
B->getVPDefID() == VPRecipeBase::VPFirstOrderRecurrencePHISC ||
B->getVPDefID() == VPRecipeBase::VPReductionPHISC ||
B->getVPDefID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
@@ -1132,6 +1143,7 @@ public:
}
static inline bool classof(const VPValue *V) {
return V->getVPValueID() == VPValue::VPVCanonicalIVPHISC ||
+ V->getVPValueID() == VPValue::VPVActiveLaneMaskPHISC ||
V->getVPValueID() == VPValue::VPVFirstOrderRecurrencePHISC ||
V->getVPValueID() == VPValue::VPVReductionPHISC ||
V->getVPValueID() == VPValue::VPVWidenIntOrFpInductionSC ||
@@ -1861,6 +1873,42 @@ public:
}
};
+/// A recipe for generating the active lane mask for the vector loop that is
+/// used to predicate the vector operations.
+/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
+/// remove VPActiveLaneMaskPHIRecipe.
+class VPActiveLaneMaskPHIRecipe : public VPHeaderPHIRecipe {
+ DebugLoc DL;
+
+public:
+ VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
+ : VPHeaderPHIRecipe(VPValue::VPVActiveLaneMaskPHISC,
+ VPActiveLaneMaskPHISC, nullptr, StartMask),
+ DL(DL) {}
+
+ ~VPActiveLaneMaskPHIRecipe() override = default;
+
+ /// Method to support type inquiry through isa, cast, and dyn_cast.
+ static inline bool classof(const VPDef *D) {
+ return D->getVPDefID() == VPActiveLaneMaskPHISC;
+ }
+ static inline bool classof(const VPHeaderPHIRecipe *D) {
+ return D->getVPDefID() == VPActiveLaneMaskPHISC;
+ }
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPVActiveLaneMaskPHISC;
+ }
+
+ /// Generate the active lane mask phi of the vector loop.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A Recipe for widening the canonical induction variable of the vector loop.
class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
public:
@@ -2656,6 +2704,10 @@ public:
return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
}
+ /// Find and return the VPActiveLaneMaskPHIRecipe from the header - there
+ /// be only one at most. If there isn't one, then return nullptr.
+ VPActiveLaneMaskPHIRecipe *getActiveLaneMaskPhi();
+
void addLiveOut(PHINode *PN, VPValue *V);
void clearLiveOuts() {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 92422b17457c..fdd901a4a70d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -26,13 +26,19 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
using namespace llvm;
+using VectorParts = SmallVector<Value *, 2>;
+
extern cl::opt<bool> EnableVPlanNativePath;
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
case VPWidenMemoryInstructionSC: {
@@ -186,7 +192,8 @@ void VPInstruction::generateInstruction(VPTransformState &State,
if (Instruction::isBinaryOp(getOpcode())) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
- Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
+ Value *V =
+ Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
State.set(this, V, Part);
return;
}
@@ -194,14 +201,14 @@ void VPInstruction::generateInstruction(VPTransformState &State,
switch (getOpcode()) {
case VPInstruction::Not: {
Value *A = State.get(getOperand(0), Part);
- Value *V = Builder.CreateNot(A);
+ Value *V = Builder.CreateNot(A, Name);
State.set(this, V, Part);
break;
}
case VPInstruction::ICmpULE: {
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
- Value *V = Builder.CreateICmpULE(IV, TC);
+ Value *V = Builder.CreateICmpULE(IV, TC, Name);
State.set(this, V, Part);
break;
}
@@ -209,7 +216,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
Value *Cond = State.get(getOperand(0), Part);
Value *Op1 = State.get(getOperand(1), Part);
Value *Op2 = State.get(getOperand(2), Part);
- Value *V = Builder.CreateSelect(Cond, Op1, Op2);
+ Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
State.set(this, V, Part);
break;
}
@@ -223,7 +230,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
auto *PredTy = VectorType::get(Int1Ty, State.VF);
Instruction *Call = Builder.CreateIntrinsic(
Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
- {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
+ {VIVElem0, ScalarTC}, nullptr, Name);
State.set(this, Call, Part);
break;
}
@@ -247,7 +254,8 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, PartMinus1, Part);
} else {
Value *V2 = State.get(getOperand(1), Part);
- State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
+ State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
+ Part);
}
break;
}
@@ -261,7 +269,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
// elements) times the unroll factor (num of SIMD instructions).
Value *Step =
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
- Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
+ Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
} else {
Next = State.get(this, 0);
}
@@ -269,6 +277,23 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.set(this, Next, Part);
break;
}
+
+ case VPInstruction::CanonicalIVIncrementForPart:
+ case VPInstruction::CanonicalIVIncrementForPartNUW: {
+ bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
+ auto *IV = State.get(getOperand(0), VPIteration(0, 0));
+ if (Part == 0) {
+ State.set(this, IV, Part);
+ break;
+ }
+
+ // The canonical IV is incremented by the vectorization factor (num of SIMD
+ // elements) times the unroll part.
+ Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
+ Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
+ State.set(this, Next, Part);
+ break;
+ }
case VPInstruction::BranchOnCond: {
if (Part != 0)
break;
@@ -370,6 +395,12 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
+ case VPInstruction::CanonicalIVIncrementForPart:
+ O << "VF * Part + ";
+ break;
+ case VPInstruction::CanonicalIVIncrementForPartNUW:
+ O << "VF * Part +(nuw) ";
+ break;
case VPInstruction::BranchOnCount:
O << "branch-on-count ";
break;
@@ -431,7 +462,158 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
getOperand(2)->printAsOperand(O, SlotTracker);
O << (InvariantCond ? " (condition is loop invariant)" : "");
}
+#endif
+
+void VPWidenSelectRecipe::execute(VPTransformState &State) {
+ auto &I = *cast<SelectInst>(getUnderlyingInstr());
+ State.setDebugLocFromInst(&I);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ auto *InvarCond =
+ InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
+ Value *Op0 = State.get(getOperand(1), Part);
+ Value *Op1 = State.get(getOperand(2), Part);
+ Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
+ State.set(this, Sel, Part);
+ State.addMetadata(Sel, &I);
+ }
+}
+
+void VPWidenRecipe::execute(VPTransformState &State) {
+ auto &I = *cast<Instruction>(getUnderlyingValue());
+ auto &Builder = State.Builder;
+ switch (I.getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Br:
+ case Instruction::PHI:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ llvm_unreachable("This instruction is handled by a different recipe.");
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::FNeg:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen unops and binops.
+ State.setDebugLocFromInst(&I);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ SmallVector<Value *, 2> Ops;
+ for (VPValue *VPOp : operands())
+ Ops.push_back(State.get(VPOp, Part));
+
+ Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+
+ if (auto *VecOp = dyn_cast<Instruction>(V)) {
+ VecOp->copyIRFlags(&I);
+
+ // If the instruction is vectorized and was in a basic block that needed
+ // predication, we can't propagate poison-generating flags (nuw/nsw,
+ // exact, etc.). The control flow has been linearized and the
+ // instruction is no longer guarded by the predicate, which could make
+ // the flag properties to no longer hold.
+ if (State.MayGeneratePoisonRecipes.contains(this))
+ VecOp->dropPoisonGeneratingFlags();
+ }
+
+ // Use this vector value for all users of the original instruction.
+ State.set(this, V, Part);
+ State.addMetadata(V, &I);
+ }
+
+ break;
+ }
+ case Instruction::Freeze: {
+ State.setDebugLocFromInst(&I);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Op = State.get(getOperand(0), Part);
+
+ Value *Freeze = Builder.CreateFreeze(Op);
+ State.set(this, Freeze, Part);
+ }
+ break;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (I.getOpcode() == Instruction::FCmp);
+ auto *Cmp = cast<CmpInst>(&I);
+ State.setDebugLocFromInst(Cmp);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ Value *C = nullptr;
+ if (FCmp) {
+ // Propagate fast math flags.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(Cmp->getFastMathFlags());
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ } else {
+ C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ }
+ State.set(this, C, Part);
+ State.addMetadata(C, &I);
+ }
+ break;
+ }
+
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ auto *CI = cast<CastInst>(&I);
+ State.setDebugLocFromInst(CI);
+
+ /// Vectorize casts.
+ Type *DestTy = (State.VF.isScalar())
+ ? CI->getType()
+ : VectorType::get(CI->getType(), State.VF);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ State.set(this, Cast, Part);
+ State.addMetadata(Cast, &I);
+ }
+ break;
+ }
+ default:
+ // This instruction is not vectorized by simple widening.
+ LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
+ llvm_unreachable("Unhandled instruction!");
+ } // end of switch.
+}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
@@ -487,7 +669,82 @@ void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent << "= SCALAR-STEPS ";
printOperands(O, SlotTracker);
}
+#endif
+
+void VPWidenGEPRecipe::execute(VPTransformState &State) {
+ auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
+ // Construct a vector GEP by widening the operands of the scalar GEP as
+ // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
+ // results in a vector of pointers when at least one operand of the GEP
+ // is vector-typed. Thus, to keep the representation compact, we only use
+ // vector-typed operands for loop-varying values.
+
+ if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
+ // If we are vectorizing, but the GEP has only loop-invariant operands,
+ // the GEP we build (by only using vector-typed operands for
+ // loop-varying values) would be a scalar pointer. Thus, to ensure we
+ // produce a vector of pointers, we need to either arbitrarily pick an
+ // operand to broadcast, or broadcast a clone of the original GEP.
+ // Here, we broadcast a clone of the original.
+ //
+ // TODO: If at some point we decide to scalarize instructions having
+ // loop-invariant operands, this special case will no longer be
+ // required. We would add the scalarization decision to
+ // collectLoopScalars() and teach getVectorValue() to broadcast
+ // the lane-zero scalar value.
+ auto *Clone = State.Builder.Insert(GEP->clone());
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
+ State.set(this, EntryPart, Part);
+ State.addMetadata(EntryPart, GEP);
+ }
+ } else {
+ // If the GEP has at least one loop-varying operand, we are sure to
+ // produce a vector of pointers. But if we are only unrolling, we want
+ // to produce a scalar GEP for each unroll part. Thus, the GEP we
+ // produce with the code below will be scalar (if VF == 1) or vector
+ // (otherwise). Note that for the unroll-only case, we still maintain
+ // values in the vector mapping with initVector, as we do for other
+ // instructions.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // The pointer operand of the new GEP. If it's loop-invariant, we
+ // won't broadcast it.
+ auto *Ptr = IsPtrLoopInvariant
+ ? State.get(getOperand(0), VPIteration(0, 0))
+ : State.get(getOperand(0), Part);
+
+ // Collect all the indices for the new GEP. If any index is
+ // loop-invariant, we won't broadcast it.
+ SmallVector<Value *, 4> Indices;
+ for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
+ VPValue *Operand = getOperand(I);
+ if (IsIndexLoopInvariant[I - 1])
+ Indices.push_back(State.get(Operand, VPIteration(0, 0)));
+ else
+ Indices.push_back(State.get(Operand, Part));
+ }
+
+ // If the GEP instruction is vectorized and was in a basic block that
+ // needed predication, we can't propagate the poison-generating 'inbounds'
+ // flag. The control flow has been linearized and the GEP is no longer
+ // guarded by the predicate, which could make the 'inbounds' properties to
+ // no longer hold.
+ bool IsInBounds =
+ GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
+
+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+ // but it should be a vector, otherwise.
+ auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
+ Indices, "", IsInBounds);
+ assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
+ "NewGEP is not a pointer vector");
+ State.set(this, NewGEP, Part);
+ State.addMetadata(NewGEP, GEP);
+ }
+ }
+}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-GEP ";
@@ -501,7 +758,48 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = getelementptr ";
printOperands(O, SlotTracker);
}
+#endif
+void VPBlendRecipe::execute(VPTransformState &State) {
+ State.setDebugLocFromInst(Phi);
+ // We know that all PHIs in non-header blocks are converted into
+ // selects, so we don't have to worry about the insertion order and we
+ // can just use the builder.
+ // At this point we generate the predication tree. There may be
+ // duplications since this is a simple recursive scan, but future
+ // optimizations will clean it up.
+
+ unsigned NumIncoming = getNumIncomingValues();
+
+ // Generate a sequence of selects of the form:
+ // SELECT(Mask3, In3,
+ // SELECT(Mask2, In2,
+ // SELECT(Mask1, In1,
+ // In0)))
+ // Note that Mask0 is never used: lanes for which no path reaches this phi and
+ // are essentially undef are taken from In0.
+ VectorParts Entry(State.UF);
+ for (unsigned In = 0; In < NumIncoming; ++In) {
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // We might have single edge PHIs (blocks) - use an identity
+ // 'select' for the first PHI operand.
+ Value *In0 = State.get(getIncomingValue(In), Part);
+ if (In == 0)
+ Entry[Part] = In0; // Initialize with the first incoming value.
+ else {
+ // Select between the current value and the previous incoming edge
+ // based on the incoming mask.
+ Value *Cond = State.get(getMask(In), Part);
+ Entry[Part] =
+ State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi");
+ }
+ }
+ }
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ State.set(this, Entry[Part], Part);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "BLEND ";
@@ -566,7 +864,35 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
if (AlsoPack)
O << " (S->V)";
}
+#endif
+void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
+ assert(State.Instance && "Branch on Mask works only on single instance.");
+
+ unsigned Part = State.Instance->Part;
+ unsigned Lane = State.Instance->Lane.getKnownLane();
+
+ Value *ConditionBit = nullptr;
+ VPValue *BlockInMask = getMask();
+ if (BlockInMask) {
+ ConditionBit = State.get(BlockInMask, Part);
+ if (ConditionBit->getType()->isVectorTy())
+ ConditionBit = State.Builder.CreateExtractElement(
+ ConditionBit, State.Builder.getInt32(Lane));
+ } else // Block in mask is all-one.
+ ConditionBit = State.Builder.getTrue();
+
+ // Replace the temporary unreachable terminator with a new conditional branch,
+ // whose two destinations will be set later when they are created.
+ auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
+ assert(isa<UnreachableInst>(CurrentTerminator) &&
+ "Expected to replace unreachable terminator with conditional branch.");
+ auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit);
+ CondBr->setSuccessor(0, nullptr);
+ ReplaceInstWithInst(CurrentTerminator, CondBr);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "PHI-PREDICATED-INSTRUCTION ";
@@ -838,3 +1164,28 @@ void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
}
#endif
+
+// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
+// remove VPActiveLaneMaskPHIRecipe.
+void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
+ BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
+ Value *StartMask = State.get(getOperand(0), Part);
+ PHINode *EntryPart =
+ State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");
+ EntryPart->addIncoming(StartMask, VectorPH);
+ EntryPart->setDebugLoc(DL);
+ State.set(this, EntryPart, Part);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "ACTIVE-LANE-MASK-PHI ";
+
+ printAsOperand(O, SlotTracker);
+ O << " = phi ";
+ printOperands(O, SlotTracker);
+}
+#endif
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 5fc676834331..c99fae1b2ab4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -103,6 +103,7 @@ public:
// Phi-like VPValues. Need to be kept together.
VPVBlendSC,
VPVCanonicalIVPHISC,
+ VPVActiveLaneMaskPHISC,
VPVFirstOrderRecurrencePHISC,
VPVWidenPHISC,
VPVWidenIntOrFpInductionSC,
@@ -358,6 +359,7 @@ public:
// Phi-like recipes. Need to be kept together.
VPBlendSC,
VPCanonicalIVPHISC,
+ VPActiveLaneMaskPHISC,
VPFirstOrderRecurrencePHISC,
VPWidenPHISC,
VPWidenIntOrFpInductionSC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index f917883145c0..3501de6ab38e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -133,32 +133,48 @@ void VPlanVerifier::verifyHierarchicalCFG(
verifyRegionRec(TopRegion);
}
-bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
- auto Iter = depth_first(
- VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
- for (const VPBasicBlock *VPBB :
- VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
- // Verify that phi-like recipes are at the beginning of the block, with no
- // other recipes in between.
- auto RecipeI = VPBB->begin();
- auto End = VPBB->end();
- while (RecipeI != End && RecipeI->isPhi())
- RecipeI++;
+static bool verifyVPBasicBlock(const VPBasicBlock *VPBB) {
+ // Verify that phi-like recipes are at the beginning of the block, with no
+ // other recipes in between.
+ auto RecipeI = VPBB->begin();
+ auto End = VPBB->end();
+ unsigned NumActiveLaneMaskPhiRecipes = 0;
+ while (RecipeI != End && RecipeI->isPhi()) {
+ if (isa<VPActiveLaneMaskPHIRecipe>(RecipeI))
+ NumActiveLaneMaskPhiRecipes++;
+ RecipeI++;
+ }
- while (RecipeI != End) {
- if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) {
- errs() << "Found phi-like recipe after non-phi recipe";
+ if (NumActiveLaneMaskPhiRecipes > 1) {
+ errs() << "There should be no more than one VPActiveLaneMaskPHIRecipe";
+ return false;
+ }
+
+ while (RecipeI != End) {
+ if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) {
+ errs() << "Found phi-like recipe after non-phi recipe";
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- errs() << ": ";
- RecipeI->dump();
- errs() << "after\n";
- std::prev(RecipeI)->dump();
+ errs() << ": ";
+ RecipeI->dump();
+ errs() << "after\n";
+ std::prev(RecipeI)->dump();
#endif
- return false;
- }
- RecipeI++;
+ return false;
}
+ RecipeI++;
+ }
+
+ return true;
+}
+
+bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
+ auto Iter = depth_first(
+ VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
+ for (const VPBasicBlock *VPBB :
+ VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
+ if (!verifyVPBasicBlock(VPBB))
+ return false;
}
const VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
@@ -181,15 +197,16 @@ bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
}
if (Exiting->empty()) {
- errs() << "VPlan vector loop exiting block must end with BranchOnCount "
- "VPInstruction but is empty\n";
+ errs() << "VPlan vector loop exiting block must end with BranchOnCount or "
+ "BranchOnCond VPInstruction but is empty\n";
return false;
}
auto *LastInst = dyn_cast<VPInstruction>(std::prev(Exiting->end()));
- if (!LastInst || LastInst->getOpcode() != VPInstruction::BranchOnCount) {
- errs() << "VPlan vector loop exit must end with BranchOnCount "
- "VPInstruction\n";
+ if (!LastInst || (LastInst->getOpcode() != VPInstruction::BranchOnCount &&
+ LastInst->getOpcode() != VPInstruction::BranchOnCond)) {
+ errs() << "VPlan vector loop exit must end with BranchOnCount or "
+ "BranchOnCond VPInstruction\n";
return false;
}
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 90598937affc..d12624ffb824 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -414,6 +414,10 @@ static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
unsigned NewIndex,
IRBuilder<> &Builder) {
+ // Shufflevectors can only be created for fixed-width vectors.
+ if (!isa<FixedVectorType>(ExtElt->getOperand(0)->getType()))
+ return nullptr;
+
// If the extract can be constant-folded, this code is unsimplified. Defer
// to other passes to handle that.
Value *X = ExtElt->getVectorOperand();
@@ -1249,14 +1253,20 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
VT != Op0->getType())
return false;
- auto *SVI0A = dyn_cast<ShuffleVectorInst>(Op0->getOperand(0));
- auto *SVI0B = dyn_cast<ShuffleVectorInst>(Op0->getOperand(1));
- auto *SVI1A = dyn_cast<ShuffleVectorInst>(Op1->getOperand(0));
- auto *SVI1B = dyn_cast<ShuffleVectorInst>(Op1->getOperand(1));
+ auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
+ auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
+ auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
+ auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
+ SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
auto checkSVNonOpUses = [&](Instruction *I) {
if (!I || I->getOperand(0)->getType() != VT)
return true;
- return any_of(I->users(), [&](User *U) { return U != Op0 && U != Op1; });
+ return any_of(I->users(), [&](User *U) {
+ return U != Op0 && U != Op1 &&
+ !(isa<ShuffleVectorInst>(U) &&
+ (InputShuffles.contains(cast<Instruction>(U)) ||
+ isInstructionTriviallyDead(cast<Instruction>(U))));
+ });
};
if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
@@ -1271,6 +1281,9 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
auto *SV = dyn_cast<ShuffleVectorInst>(U);
if (!SV || SV->getType() != VT)
return false;
+ if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
+ (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
+ return false;
if (!llvm::is_contained(Shuffles, SV))
Shuffles.push_back(SV);
}
@@ -1283,13 +1296,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
if (FromReduction && Shuffles.size() > 1)
return false;
+ // Add any shuffle uses for the shuffles we have found, to include them in our
+ // cost calculations.
+ if (!FromReduction) {
+ for (ShuffleVectorInst *SV : Shuffles) {
+ for (auto U : SV->users()) {
+ ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U);
+ if (SSV && isa<UndefValue>(SSV->getOperand(1)))
+ Shuffles.push_back(SSV);
+ }
+ }
+ }
+
// For each of the output shuffles, we try to sort all the first vector
// elements to the beginning, followed by the second array elements at the
// end. If the binops are legalized to smaller vectors, this may reduce total
// number of binops. We compute the ReconstructMask mask needed to convert
// back to the original lane order.
- SmallVector<int> V1, V2;
- SmallVector<SmallVector<int>> ReconstructMasks;
+ SmallVector<std::pair<int, int>> V1, V2;
+ SmallVector<SmallVector<int>> OrigReconstructMasks;
int MaxV1Elt = 0, MaxV2Elt = 0;
unsigned NumElts = VT->getNumElements();
for (ShuffleVectorInst *SVN : Shuffles) {
@@ -1300,6 +1325,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
// case we need to commute the mask).
Value *SVOp0 = SVN->getOperand(0);
Value *SVOp1 = SVN->getOperand(1);
+ if (isa<UndefValue>(SVOp1)) {
+ auto *SSV = cast<ShuffleVectorInst>(SVOp0);
+ SVOp0 = SSV->getOperand(0);
+ SVOp1 = SSV->getOperand(1);
+ for (unsigned I = 0, E = Mask.size(); I != E; I++) {
+ if (Mask[I] >= static_cast<int>(SSV->getShuffleMask().size()))
+ return false;
+ Mask[I] = Mask[I] < 0 ? Mask[I] : SSV->getMaskValue(Mask[I]);
+ }
+ }
if (SVOp0 == Op1 && SVOp1 == Op0) {
std::swap(SVOp0, SVOp1);
ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
@@ -1316,21 +1351,25 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
ReconstructMask.push_back(-1);
} else if (Mask[I] < static_cast<int>(NumElts)) {
MaxV1Elt = std::max(MaxV1Elt, Mask[I]);
- auto It = find(V1, Mask[I]);
+ auto It = find_if(V1, [&](const std::pair<int, int> &A) {
+ return Mask[I] == A.first;
+ });
if (It != V1.end())
ReconstructMask.push_back(It - V1.begin());
else {
ReconstructMask.push_back(V1.size());
- V1.push_back(Mask[I]);
+ V1.emplace_back(Mask[I], V1.size());
}
} else {
MaxV2Elt = std::max<int>(MaxV2Elt, Mask[I] - NumElts);
- auto It = find(V2, Mask[I] - NumElts);
+ auto It = find_if(V2, [&](const std::pair<int, int> &A) {
+ return Mask[I] - static_cast<int>(NumElts) == A.first;
+ });
if (It != V2.end())
ReconstructMask.push_back(NumElts + It - V2.begin());
else {
ReconstructMask.push_back(NumElts + V2.size());
- V2.push_back(Mask[I] - NumElts);
+ V2.emplace_back(Mask[I] - NumElts, NumElts + V2.size());
}
}
}
@@ -1339,7 +1378,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
// result. In-order can help simplify the shuffle away.
if (FromReduction)
sort(ReconstructMask);
- ReconstructMasks.push_back(ReconstructMask);
+ OrigReconstructMasks.push_back(std::move(ReconstructMask));
}
// If the Maximum element used from V1 and V2 are not larger than the new
@@ -1351,16 +1390,68 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
MaxV2Elt == static_cast<int>(V2.size()) - 1))
return false;
+ // GetBaseMaskValue takes one of the inputs, which may either be a shuffle, a
+ // shuffle of another shuffle, or not a shuffle (that is treated like a
+ // identity shuffle).
+ auto GetBaseMaskValue = [&](Instruction *I, int M) {
+ auto *SV = dyn_cast<ShuffleVectorInst>(I);
+ if (!SV)
+ return M;
+ if (isa<UndefValue>(SV->getOperand(1)))
+ if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
+ if (InputShuffles.contains(SSV))
+ return SSV->getMaskValue(SV->getMaskValue(M));
+ return SV->getMaskValue(M);
+ };
+
+ // Attempt to sort the inputs my ascending mask values to make simpler input
+ // shuffles and push complex shuffles down to the uses. We sort on the first
+ // of the two input shuffle orders, to try and get at least one input into a
+ // nice order.
+ auto SortBase = [&](Instruction *A, std::pair<int, int> X,
+ std::pair<int, int> Y) {
+ int MXA = GetBaseMaskValue(A, X.first);
+ int MYA = GetBaseMaskValue(A, Y.first);
+ return MXA < MYA;
+ };
+ stable_sort(V1, [&](std::pair<int, int> A, std::pair<int, int> B) {
+ return SortBase(SVI0A, A, B);
+ });
+ stable_sort(V2, [&](std::pair<int, int> A, std::pair<int, int> B) {
+ return SortBase(SVI1A, A, B);
+ });
+ // Calculate our ReconstructMasks from the OrigReconstructMasks and the
+ // modified order of the input shuffles.
+ SmallVector<SmallVector<int>> ReconstructMasks;
+ for (auto Mask : OrigReconstructMasks) {
+ SmallVector<int> ReconstructMask;
+ for (int M : Mask) {
+ auto FindIndex = [](const SmallVector<std::pair<int, int>> &V, int M) {
+ auto It = find_if(V, [M](auto A) { return A.second == M; });
+ assert(It != V.end() && "Expected all entries in Mask");
+ return std::distance(V.begin(), It);
+ };
+ if (M < 0)
+ ReconstructMask.push_back(-1);
+ else if (M < static_cast<int>(NumElts)) {
+ ReconstructMask.push_back(FindIndex(V1, M));
+ } else {
+ ReconstructMask.push_back(NumElts + FindIndex(V2, M));
+ }
+ }
+ ReconstructMasks.push_back(std::move(ReconstructMask));
+ }
+
// Calculate the masks needed for the new input shuffles, which get padded
// with undef
SmallVector<int> V1A, V1B, V2A, V2B;
for (unsigned I = 0; I < V1.size(); I++) {
- V1A.push_back(SVI0A->getMaskValue(V1[I]));
- V1B.push_back(SVI0B->getMaskValue(V1[I]));
+ V1A.push_back(GetBaseMaskValue(SVI0A, V1[I].first));
+ V1B.push_back(GetBaseMaskValue(SVI0B, V1[I].first));
}
for (unsigned I = 0; I < V2.size(); I++) {
- V2A.push_back(SVI1A->getMaskValue(V2[I]));
- V2B.push_back(SVI1B->getMaskValue(V2[I]));
+ V2A.push_back(GetBaseMaskValue(SVI1A, V2[I].first));
+ V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first));
}
while (V1A.size() < NumElts) {
V1A.push_back(UndefMaskElem);
@@ -1371,9 +1462,14 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
V2B.push_back(UndefMaskElem);
}
- auto AddShuffleCost = [&](InstructionCost C, ShuffleVectorInst *SV) {
- return C +
- TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, SV->getShuffleMask());
+ auto AddShuffleCost = [&](InstructionCost C, Instruction *I) {
+ auto *SV = dyn_cast<ShuffleVectorInst>(I);
+ if (!SV)
+ return C;
+ return C + TTI.getShuffleCost(isa<UndefValue>(SV->getOperand(1))
+ ? TTI::SK_PermuteSingleSrc
+ : TTI::SK_PermuteTwoSrc,
+ VT, SV->getShuffleMask());
};
auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef<int> Mask) {
return C + TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, VT, Mask);
@@ -1386,9 +1482,6 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
TTI.getArithmeticInstrCost(Op1->getOpcode(), VT);
CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
InstructionCost(0), AddShuffleCost);
- // This set helps us only cost each unique shuffle once.
- SmallPtrSet<ShuffleVectorInst *, 4> InputShuffles(
- {SVI0A, SVI0B, SVI1A, SVI1B});
CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
InstructionCost(0), AddShuffleCost);
@@ -1408,22 +1501,35 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
InstructionCost(0), AddShuffleMaskCost);
+ LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n");
+ LLVM_DEBUG(dbgs() << " CostBefore: " << CostBefore
+ << " vs CostAfter: " << CostAfter << "\n");
if (CostBefore <= CostAfter)
return false;
// The cost model has passed, create the new instructions.
- Builder.SetInsertPoint(SVI0A);
- Value *NSV0A = Builder.CreateShuffleVector(SVI0A->getOperand(0),
- SVI0A->getOperand(1), V1A);
- Builder.SetInsertPoint(SVI0B);
- Value *NSV0B = Builder.CreateShuffleVector(SVI0B->getOperand(0),
- SVI0B->getOperand(1), V1B);
- Builder.SetInsertPoint(SVI1A);
- Value *NSV1A = Builder.CreateShuffleVector(SVI1A->getOperand(0),
- SVI1A->getOperand(1), V2A);
- Builder.SetInsertPoint(SVI1B);
- Value *NSV1B = Builder.CreateShuffleVector(SVI1B->getOperand(0),
- SVI1B->getOperand(1), V2B);
+ auto GetShuffleOperand = [&](Instruction *I, unsigned Op) -> Value * {
+ auto *SV = dyn_cast<ShuffleVectorInst>(I);
+ if (!SV)
+ return I;
+ if (isa<UndefValue>(SV->getOperand(1)))
+ if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
+ if (InputShuffles.contains(SSV))
+ return SSV->getOperand(Op);
+ return SV->getOperand(Op);
+ };
+ Builder.SetInsertPoint(SVI0A->getNextNode());
+ Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
+ GetShuffleOperand(SVI0A, 1), V1A);
+ Builder.SetInsertPoint(SVI0B->getNextNode());
+ Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
+ GetShuffleOperand(SVI0B, 1), V1B);
+ Builder.SetInsertPoint(SVI1A->getNextNode());
+ Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
+ GetShuffleOperand(SVI1A, 1), V2A);
+ Builder.SetInsertPoint(SVI1B->getNextNode());
+ Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
+ GetShuffleOperand(SVI1B, 1), V2B);
Builder.SetInsertPoint(Op0);
Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(),
NSV0A, NSV0B);